1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60#include <linux/config.h>
61#include <linux/mm.h>
62#include <linux/sysctl.h>
63#include <net/tcp.h>
64#include <linux/ipsec.h>
65
66#ifdef CONFIG_SYSCTL
67#define SYNC_INIT 0
68#else
69#define SYNC_INIT 1
70#endif
71
72extern int sysctl_tcp_fin_timeout;
73
74
75
76
77int sysctl_tcp_timestamps = 1;
78int sysctl_tcp_window_scaling = 1;
79int sysctl_tcp_sack = 1;
80
81int sysctl_tcp_syncookies = SYNC_INIT;
82int sysctl_tcp_stdurg;
83int sysctl_tcp_rfc1337;
84
85static int prune_queue(struct sock *sk);
86
87
88
89
90
91
92
93
94
95
96
97static void tcp_delack_estimator(struct tcp_opt *tp)
98{
99 if(tp->ato == 0) {
100 tp->lrcvtime = jiffies;
101
102
103
104
105
106 tp->ato = 1;
107 tcp_enter_quickack_mode(tp);
108 } else {
109 int m = jiffies - tp->lrcvtime;
110
111 tp->lrcvtime = jiffies;
112 if(m <= 0)
113 m = 1;
114 if(m > tp->rto)
115 tp->ato = tp->rto;
116 else {
117
118
119
120 tp->ato = ((tp->ato << 1) >> 2) + m;
121 }
122 }
123}
124
125
126
127
128static __inline__ void tcp_remember_ack(struct tcp_opt *tp, struct tcphdr *th,
129 struct sk_buff *skb)
130{
131 tp->delayed_acks++;
132
133
134
135
136 if(th->psh && (skb->len < (tp->mss_cache >> 1))) {
137
138 if((tp->ato & 0x7fffffff) > HZ/50)
139 tp->ato = ((tp->ato & 0x80000000) |
140 (HZ/50));
141 }
142}
143
144
145
146
147
148
149
150
151
152
153
154static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
155{
156 long m = mrtt;
157
158
159
160
161
162
163
164
165
166
167 if(m == 0)
168 m = 1;
169 if (tp->srtt != 0) {
170 m -= (tp->srtt >> 3);
171 tp->srtt += m;
172 if (m < 0)
173 m = -m;
174 m -= (tp->mdev >> 2);
175 tp->mdev += m;
176 } else {
177
178 tp->srtt = m<<3;
179 tp->mdev = m<<2;
180 }
181}
182
183
184
185
186
187static __inline__ void tcp_set_rto(struct tcp_opt *tp)
188{
189 tp->rto = (tp->srtt >> 3) + tp->mdev;
190 tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
191}
192
193
194
195
196
197
198
199
200
201
202
203
204static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
205{
206 if (tp->rto > 120*HZ)
207 tp->rto = 120*HZ;
208 if (tp->rto < HZ/5)
209 tp->rto = HZ/5;
210}
211
212
213extern __inline__ void tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp,
214 __u32 start_seq, __u32 end_seq)
215{
216
217
218
219
220
221
222
223
224
225
226 if (!before(end_seq, tp->last_ack_sent - sk->rcvbuf) &&
227 !after(start_seq, tp->rcv_wup + tp->rcv_wnd)) {
228
229
230
231
232 if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0) {
233 tp->ts_recent = tp->rcv_tsval;
234 tp->ts_recent_stamp = jiffies;
235 }
236 }
237}
238
239#define PAWS_24DAYS (HZ * 60 * 60 * 24 * 24)
240
241extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct tcphdr *th, unsigned len)
242{
243
244 return (((s32)(jiffies - tp->ts_recent_stamp) >= PAWS_24DAYS) ||
245 (((s32)(tp->rcv_tsval - tp->ts_recent) < 0) &&
246
247 (len != (th->doff * 4))));
248}
249
250
251static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
252{
253 u32 end_window = tp->rcv_wup + tp->rcv_wnd;
254
255 if (tp->rcv_wnd &&
256 after(end_seq, tp->rcv_nxt) &&
257 before(seq, end_window))
258 return 1;
259 if (seq != end_window)
260 return 0;
261 return (seq == end_seq);
262}
263
264
265extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
266{
267 if (seq == tp->rcv_nxt)
268 return (tp->rcv_wnd || (end_seq == seq));
269
270 return __tcp_sequence(tp, seq, end_seq);
271}
272
273
274static void tcp_reset(struct sock *sk)
275{
276 sk->zapped = 1;
277
278
279 switch (sk->state) {
280 case TCP_SYN_SENT:
281 sk->err = ECONNREFUSED;
282 break;
283 case TCP_CLOSE_WAIT:
284 sk->err = EPIPE;
285 break;
286 default:
287 sk->err = ECONNRESET;
288 };
289 tcp_set_state(sk, TCP_CLOSE);
290 sk->shutdown = SHUTDOWN_MASK;
291 if (!sk->dead)
292 sk->state_change(sk);
293}
294
295
296static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp, int nsacks)
297{
298 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
299 int i = nsacks;
300
301 while(i--) {
302 struct sk_buff *skb = skb_peek(&sk->write_queue);
303 __u32 start_seq = ntohl(sp->start_seq);
304 __u32 end_seq = ntohl(sp->end_seq);
305 int fack_count = 0;
306
307 while((skb != NULL) &&
308 (skb != tp->send_head) &&
309 (skb != (struct sk_buff *)&sk->write_queue)) {
310
311
312
313 if(after(TCP_SKB_CB(skb)->seq, end_seq))
314 break;
315
316
317
318
319 fack_count++;
320 if(!after(start_seq, TCP_SKB_CB(skb)->seq) &&
321 !before(end_seq, TCP_SKB_CB(skb)->end_seq)) {
322
323 if((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&
324 tp->retrans_out)
325 tp->retrans_out--;
326 TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
327
328
329
330
331 if(fack_count > tp->fackets_out)
332 tp->fackets_out = fack_count;
333 }
334 skb = skb->next;
335 }
336 sp++;
337 }
338}
339
340
341
342
343
344void tcp_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
345{
346 unsigned char *ptr;
347 int length=(th->doff*4)-sizeof(struct tcphdr);
348 int saw_mss = 0;
349
350 ptr = (unsigned char *)(th + 1);
351 tp->saw_tstamp = 0;
352
353 while(length>0) {
354 int opcode=*ptr++;
355 int opsize;
356
357 switch (opcode) {
358 case TCPOPT_EOL:
359 return;
360 case TCPOPT_NOP:
361 length--;
362 continue;
363 default:
364 opsize=*ptr++;
365 if (opsize < 2)
366 return;
367 if (opsize > length)
368 break;
369 switch(opcode) {
370 case TCPOPT_MSS:
371 if(opsize==TCPOLEN_MSS && th->syn) {
372 u16 in_mss = ntohs(*(__u16 *)ptr);
373 if (in_mss == 0)
374 in_mss = 536;
375 if (tp->mss_clamp > in_mss)
376 tp->mss_clamp = in_mss;
377 saw_mss = 1;
378 }
379 break;
380 case TCPOPT_WINDOW:
381 if(opsize==TCPOLEN_WINDOW && th->syn)
382 if (!no_fancy && sysctl_tcp_window_scaling) {
383 tp->wscale_ok = 1;
384 tp->snd_wscale = *(__u8 *)ptr;
385 if(tp->snd_wscale > 14) {
386 if(net_ratelimit())
387 printk("tcp_parse_options: Illegal window "
388 "scaling value %d >14 received.",
389 tp->snd_wscale);
390 tp->snd_wscale = 14;
391 }
392 }
393 break;
394 case TCPOPT_TIMESTAMP:
395 if(opsize==TCPOLEN_TIMESTAMP) {
396 if (sysctl_tcp_timestamps && !no_fancy) {
397 tp->tstamp_ok = 1;
398 tp->saw_tstamp = 1;
399 tp->rcv_tsval = ntohl(*(__u32 *)ptr);
400 tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
401 }
402 }
403 break;
404 case TCPOPT_SACK_PERM:
405 if(opsize==TCPOLEN_SACK_PERM && th->syn) {
406 if (sysctl_tcp_sack && !no_fancy) {
407 tp->sack_ok = 1;
408 tp->num_sacks = 0;
409 }
410 }
411 break;
412
413 case TCPOPT_SACK:
414 if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
415 sysctl_tcp_sack && (sk != NULL) && !th->syn) {
416 int sack_bytes = opsize - TCPOLEN_SACK_BASE;
417
418 if(!(sack_bytes % TCPOLEN_SACK_PERBLOCK)) {
419 int num_sacks = sack_bytes >> 3;
420 struct tcp_sack_block *sackp;
421
422 sackp = (struct tcp_sack_block *)ptr;
423 tcp_sacktag_write_queue(sk, sackp, num_sacks);
424 }
425 }
426 };
427 ptr+=opsize-2;
428 length-=opsize;
429 };
430 }
431 if(th->syn && saw_mss == 0)
432 tp->mss_clamp = 536;
433}
434
435
436
437
438static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp)
439{
440
441 if (tp->tcp_header_len == sizeof(struct tcphdr))
442 return 0;
443 if (th->doff == sizeof(struct tcphdr)>>2) {
444 tp->saw_tstamp = 0;
445 return 0;
446 } else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
447 __u32 *ptr = (__u32 *)(th + 1);
448 if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
449 | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
450 tp->saw_tstamp = 1;
451 tp->rcv_tsval = ntohl(*++ptr);
452 tp->rcv_tsecr = ntohl(*++ptr);
453 return 1;
454 }
455 }
456 tcp_parse_options(sk, th, tp, 0);
457 return 1;
458}
459
460#define FLAG_DATA 0x01
461#define FLAG_WIN_UPDATE 0x02
462#define FLAG_DATA_ACKED 0x04
463#define FLAG_RETRANS_DATA_ACKED 0x08
464
465static __inline__ void clear_fast_retransmit(struct tcp_opt *tp)
466{
467 if (tp->dup_acks > 3)
468 tp->snd_cwnd = (tp->snd_ssthresh);
469
470 tp->dup_acks = 0;
471}
472
473
474
475
476static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
477{
478 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
479
480
481
482
483
484
485
486
487
488
489
490 if (ack == tp->snd_una && tp->packets_out && (not_dup == 0)) {
491
492
493
494
495
496
497 if (tp->high_seq == 0 || after(ack, tp->high_seq)) {
498 tp->dup_acks++;
499 if ((tp->fackets_out > 3) || (tp->dup_acks == 3)) {
500 tp->snd_ssthresh =
501 max(min(tp->snd_wnd, tp->snd_cwnd) >> 1, 2);
502 tp->snd_cwnd = (tp->snd_ssthresh + 3);
503 tp->high_seq = tp->snd_nxt;
504 if(!tp->fackets_out)
505 tcp_retransmit_skb(sk,
506 skb_peek(&sk->write_queue));
507 else
508 tcp_fack_retransmit(sk);
509 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
510 }
511 } else if (++tp->dup_acks > 3) {
512
513
514
515
516
517
518
519
520 if(!tp->fackets_out) {
521 tp->snd_cwnd++;
522 } else {
523
524
525
526
527
528
529
530
531 tcp_fack_retransmit(sk);
532 }
533 }
534 } else if (tp->high_seq != 0) {
535
536
537
538
539 if (!before(ack, tp->high_seq) || (not_dup & FLAG_DATA) != 0) {
540
541
542
543
544
545
546
547 clear_fast_retransmit(tp);
548
549
550
551
552 if (!before(ack, tp->high_seq)) {
553 tp->high_seq = 0;
554 tp->fackets_out = 0;
555 }
556 } else if (tp->dup_acks >= 3) {
557 if (!tp->fackets_out) {
558
559
560
561
562
563
564
565
566
567
568
569
570
571 if (ack != tp->snd_una && before(ack, tp->high_seq)) {
572 tcp_retransmit_skb(sk,
573 skb_peek(&sk->write_queue));
574 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
575 }
576 } else {
577
578
579
580 tcp_fack_retransmit(sk);
581 }
582 }
583 }
584}
585
586
587
588
589static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
590{
591 if (tp->snd_cwnd <= tp->snd_ssthresh) {
592
593 tp->snd_cwnd++;
594 } else {
595
596
597
598 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
599 tp->snd_cwnd++;
600 tp->snd_cwnd_cnt=0;
601 } else
602 tp->snd_cwnd_cnt++;
603 }
604}
605
606
607static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
608 __u32 *seq, __u32 *seq_rtt)
609{
610 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
611 struct sk_buff *skb;
612 unsigned long now = jiffies;
613 int acked = 0;
614
615
616
617
618 if (tp->retrans_head != NULL &&
619 !before(ack, TCP_SKB_CB(tp->retrans_head)->end_seq))
620 tp->retrans_head = NULL;
621
622 while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
623 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
624 __u8 sacked = scb->sacked;
625
626
627
628
629
630 if (after(scb->end_seq, ack))
631 break;
632
633
634
635
636
637
638
639
640 if((sacked & TCPCB_SACKED_RETRANS) && tp->retrans_out)
641 tp->retrans_out--;
642 if(!(scb->flags & TCPCB_FLAG_SYN)) {
643 acked |= FLAG_DATA_ACKED;
644 if(sacked & TCPCB_SACKED_RETRANS)
645 acked |= FLAG_RETRANS_DATA_ACKED;
646 if(tp->fackets_out)
647 tp->fackets_out--;
648 } else {
649
650 tp->retrans_head = NULL;
651 }
652 tp->packets_out--;
653 *seq = scb->seq;
654 *seq_rtt = now - scb->when;
655 __skb_unlink(skb, skb->list);
656 kfree_skb(skb);
657 }
658 return acked;
659}
660
661static void tcp_ack_probe(struct sock *sk, __u32 ack)
662{
663 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
664
665
666 tp->probes_out = 0;
667
668
669
670
671 if (tp->send_head != NULL &&
672 !before (ack + tp->snd_wnd, TCP_SKB_CB(tp->send_head)->end_seq)) {
673 tp->backoff = 0;
674 tp->pending = 0;
675 tcp_clear_xmit_timer(sk, TIME_PROBE0);
676 } else {
677 tcp_reset_xmit_timer(sk, TIME_PROBE0,
678 min(tp->rto << tp->backoff, 120*HZ));
679 }
680}
681
682
683static __inline__ int should_advance_cwnd(struct tcp_opt *tp, int flag)
684{
685
686 if ((flag & FLAG_DATA_ACKED) == 0)
687 return 0;
688
689
690 if ((flag & FLAG_RETRANS_DATA_ACKED) != 0) {
691
692
693
694 if (tp->fackets_out != 0 ||
695 tp->retransmits != 0)
696 return 1;
697
698
699
700
701
702 return 0;
703 }
704
705
706 return 1;
707}
708
709
710
711
712static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
713 u32 seq, u32 ack, int flag)
714{
715 __u32 seq_rtt;
716
717
718
719
720
721
722
723
724
725 if (!(flag & FLAG_DATA_ACKED))
726 return;
727
728 seq_rtt = jiffies-tp->rcv_tsecr;
729 tcp_rtt_estimator(tp, seq_rtt);
730 if (tp->retransmits) {
731 if (tp->packets_out == 0) {
732 tp->retransmits = 0;
733 tp->fackets_out = 0;
734 tp->retrans_out = 0;
735 tp->backoff = 0;
736 tcp_set_rto(tp);
737 } else {
738
739 tcp_set_rto(tp);
740 tp->rto = tp->rto << tp->backoff;
741 }
742 } else {
743 tcp_set_rto(tp);
744 }
745
746 tcp_bound_rto(tp);
747}
748
749static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
750{
751 struct sk_buff *skb = skb_peek(&sk->write_queue);
752 long when = tp->rto - (jiffies - TCP_SKB_CB(skb)->when);
753
754
755
756
757
758 if (tp->retransmits) {
759 tcp_xmit_retransmit_queue(sk);
760 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
761 } else {
762 tcp_reset_xmit_timer(sk, TIME_RETRANS, when);
763 }
764}
765
766
767static int tcp_ack(struct sock *sk, struct tcphdr *th,
768 u32 ack_seq, u32 ack, int len)
769{
770 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
771 int flag = 0;
772 u32 seq = 0;
773 u32 seq_rtt = 0;
774
775 if(sk->zapped)
776 return(1);
777
778 if (tp->pending == TIME_KEEPOPEN)
779 tp->probes_out = 0;
780
781 tp->rcv_tstamp = jiffies;
782
783
784
785
786 if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una))
787 goto uninteresting_ack;
788
789 dst_confirm(sk->dst_cache);
790
791
792 if (len != th->doff*4) {
793 flag |= FLAG_DATA;
794 tcp_delack_estimator(tp);
795 }
796
797
798
799
800
801
802
803 if (before(tp->snd_wl1, ack_seq) ||
804 (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) {
805 u32 nwin = ntohs(th->window) << tp->snd_wscale;
806
807 if ((tp->snd_wl2 != ack) || (nwin > tp->snd_wnd)) {
808 flag |= FLAG_WIN_UPDATE;
809 tp->snd_wnd = nwin;
810
811 tp->snd_wl1 = ack_seq;
812 tp->snd_wl2 = ack;
813
814 if (nwin > tp->max_window)
815 tp->max_window = nwin;
816 }
817 }
818
819
820
821
822 sk->err_soft = 0;
823
824
825
826
827
828 if (tp->pending == TIME_PROBE0)
829 tcp_ack_probe(sk, ack);
830
831
832 flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt);
833
834
835
836
837 if (should_advance_cwnd(tp, flag))
838 tcp_cong_avoid(tp);
839
840
841 if (tp->saw_tstamp) {
842 tcp_ack_saw_tstamp(sk, tp, seq, ack, flag);
843 } else {
844
845 if (tp->retransmits) {
846 if (tp->packets_out == 0) {
847 tp->retransmits = 0;
848 tp->fackets_out = 0;
849 tp->retrans_out = 0;
850 }
851 } else {
852
853
854
855
856
857
858
859
860 if (flag & FLAG_DATA_ACKED) {
861 if(!(flag & FLAG_RETRANS_DATA_ACKED)) {
862 tp->backoff = 0;
863 tcp_rtt_estimator(tp, seq_rtt);
864 tcp_set_rto(tp);
865 tcp_bound_rto(tp);
866 }
867 }
868 }
869 }
870
871 if (tp->packets_out) {
872 if (flag & FLAG_DATA_ACKED)
873 tcp_ack_packets_out(sk, tp);
874 } else {
875 tcp_clear_xmit_timer(sk, TIME_RETRANS);
876 }
877
878 flag &= (FLAG_DATA | FLAG_WIN_UPDATE);
879 if ((ack == tp->snd_una && tp->packets_out && flag == 0) ||
880 (tp->high_seq != 0)) {
881 tcp_fast_retrans(sk, ack, flag);
882 } else {
883
884 tp->dup_acks = 0;
885 }
886
887 tp->snd_una = ack;
888 return 1;
889
890uninteresting_ack:
891 SOCK_DEBUG(sk, "Ack ignored %u %u\n", ack, tp->snd_nxt);
892 return 0;
893}
894
895
896extern void tcp_tw_schedule(struct tcp_tw_bucket *tw);
897extern void tcp_tw_reschedule(struct tcp_tw_bucket *tw);
898extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
899
900void tcp_timewait_kill(struct tcp_tw_bucket *tw)
901{
902
903 if(tw->bind_next)
904 tw->bind_next->bind_pprev = tw->bind_pprev;
905 *(tw->bind_pprev) = tw->bind_next;
906 if(tw->tb->owners == NULL)
907 tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
908
909 if(tw->next)
910 tw->next->pprev = tw->pprev;
911 *tw->pprev = tw->next;
912
913
914
915
916 tw->sklist_next->sklist_prev = tw->sklist_prev;
917 tw->sklist_prev->sklist_next = tw->sklist_next;
918
919
920 kmem_cache_free(tcp_timewait_cachep, tw);
921}
922
923
924
925
926
927
928
929int tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
930 struct tcphdr *th, unsigned len)
931{
932
933
934
935
936
937
938
939
940
941
942
943
944
945 if(th->syn && !th->rst && after(TCP_SKB_CB(skb)->seq, tw->rcv_nxt)) {
946 struct sock *sk;
947 struct tcp_func *af_specific = tw->af_specific;
948 __u32 isn;
949
950 isn = tw->rcv_nxt + 128000;
951 if(isn == 0)
952 isn++;
953 tcp_tw_deschedule(tw);
954 tcp_timewait_kill(tw);
955 sk = af_specific->get_sock(skb, th);
956 if(sk == NULL ||
957 !ipsec_sk_policy(sk,skb) ||
958 atomic_read(&sk->sock_readers) != 0)
959 return 0;
960 skb_set_owner_r(skb, sk);
961 af_specific = sk->tp_pinfo.af_tcp.af_specific;
962 if(af_specific->conn_request(sk, skb, isn) < 0)
963 return 1;
964 return 0;
965 }
966
967
968 if(th->rst || th->syn) {
969
970
971
972
973 if(sysctl_tcp_rfc1337 == 0) {
974 tcp_tw_deschedule(tw);
975 tcp_timewait_kill(tw);
976 }
977 if(!th->rst)
978 return 1;
979 } else {
980
981 if(th->ack)
982 tcp_tw_reschedule(tw);
983 }
984 return 0;
985}
986
987
988
989
990
991
992static __inline__ void tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
993{
994 struct sock **head, *sktw;
995
996
997 if(sk->next)
998 sk->next->pprev = sk->pprev;
999 *sk->pprev = sk->next;
1000 sk->pprev = NULL;
1001 tcp_reg_zap(sk);
1002
1003
1004 tw->tb = (struct tcp_bind_bucket *)sk->prev;
1005 if((tw->bind_next = sk->bind_next) != NULL)
1006 sk->bind_next->bind_pprev = &tw->bind_next;
1007 tw->bind_pprev = sk->bind_pprev;
1008 *sk->bind_pprev = (struct sock *)tw;
1009
1010
1011 (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw;
1012 (tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw;
1013 sk->sklist_next = NULL;
1014 sk->prot->inuse--;
1015
1016
1017 head = &tcp_established_hash[sk->hashent + (TCP_HTABLE_SIZE/2)];
1018 sktw = (struct sock *)tw;
1019 if((sktw->next = *head) != NULL)
1020 (*head)->pprev = &sktw->next;
1021 *head = sktw;
1022 sktw->pprev = head;
1023}
1024
1025void tcp_time_wait(struct sock *sk)
1026{
1027 struct tcp_tw_bucket *tw;
1028
1029 tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
1030 if(tw != NULL) {
1031
1032 tw->daddr = sk->daddr;
1033 tw->rcv_saddr = sk->rcv_saddr;
1034 tw->bound_dev_if= sk->bound_dev_if;
1035 tw->num = sk->num;
1036 tw->state = TCP_TIME_WAIT;
1037 tw->sport = sk->sport;
1038 tw->dport = sk->dport;
1039 tw->family = sk->family;
1040 tw->reuse = sk->reuse;
1041 tw->rcv_nxt = sk->tp_pinfo.af_tcp.rcv_nxt;
1042 tw->af_specific = sk->tp_pinfo.af_tcp.af_specific;
1043
1044#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1045 if(tw->family == PF_INET6) {
1046 memcpy(&tw->v6_daddr,
1047 &sk->net_pinfo.af_inet6.daddr,
1048 sizeof(struct in6_addr));
1049 memcpy(&tw->v6_rcv_saddr,
1050 &sk->net_pinfo.af_inet6.rcv_saddr,
1051 sizeof(struct in6_addr));
1052 }
1053#endif
1054
1055 tcp_tw_hashdance(sk, tw);
1056
1057
1058 tcp_tw_schedule(tw);
1059
1060
1061 if(sk->state == TCP_ESTABLISHED)
1062 tcp_statistics.TcpCurrEstab--;
1063 sk->state = TCP_CLOSE;
1064 net_reset_timer(sk, TIME_DONE,
1065 min(sk->tp_pinfo.af_tcp.srtt * 2, TCP_DONE_TIME));
1066 } else {
1067
1068
1069
1070
1071 tcp_set_state(sk, TCP_CLOSE);
1072 }
1073
1074
1075 sk->shutdown = SHUTDOWN_MASK;
1076 if(!sk->dead)
1077 sk->state_change(sk);
1078}
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
1096{
1097 sk->tp_pinfo.af_tcp.fin_seq = TCP_SKB_CB(skb)->end_seq;
1098
1099 tcp_send_ack(sk);
1100
1101 if (!sk->dead) {
1102 sk->state_change(sk);
1103 sock_wake_async(sk->socket, 1);
1104 }
1105
1106 switch(sk->state) {
1107 case TCP_SYN_RECV:
1108 case TCP_ESTABLISHED:
1109
1110 tcp_set_state(sk, TCP_CLOSE_WAIT);
1111 if (th->rst)
1112 sk->shutdown = SHUTDOWN_MASK;
1113 break;
1114
1115 case TCP_CLOSE_WAIT:
1116 case TCP_CLOSING:
1117
1118
1119
1120 break;
1121 case TCP_LAST_ACK:
1122
1123 break;
1124
1125 case TCP_FIN_WAIT1:
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136 tcp_set_state(sk, TCP_CLOSING);
1137 break;
1138 case TCP_FIN_WAIT2:
1139
1140 tcp_time_wait(sk);
1141 break;
1142 default:
1143
1144
1145
1146 printk("tcp_fin: Impossible, sk->state=%d\n", sk->state);
1147 break;
1148 };
1149}
1150
1151
1152
1153
1154static void tcp_sack_maybe_coalesce(struct tcp_opt *tp, struct tcp_sack_block *sp)
1155{
1156 int this_sack, num_sacks = tp->num_sacks;
1157 struct tcp_sack_block *swalk = &tp->selective_acks[0];
1158
1159
1160
1161
1162 if(num_sacks != 1) {
1163 for(this_sack = 0; this_sack < num_sacks; this_sack++, swalk++) {
1164 if(swalk == sp)
1165 continue;
1166
1167
1168
1169
1170 if(between(sp->start_seq, swalk->start_seq, swalk->end_seq)) {
1171 sp->start_seq = swalk->start_seq;
1172 goto coalesce;
1173 }
1174
1175
1176
1177 if(between(sp->end_seq, swalk->start_seq, swalk->end_seq)) {
1178 sp->end_seq = swalk->end_seq;
1179 goto coalesce;
1180 }
1181 }
1182 }
1183
1184 return;
1185
1186coalesce:
1187
1188
1189
1190 for(; this_sack < num_sacks-1; this_sack++, swalk++) {
1191 struct tcp_sack_block *next = (swalk + 1);
1192 swalk->start_seq = next->start_seq;
1193 swalk->end_seq = next->end_seq;
1194 }
1195 tp->num_sacks--;
1196}
1197
1198static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
1199{
1200 __u32 tmp;
1201
1202 tmp = sack1->start_seq;
1203 sack1->start_seq = sack2->start_seq;
1204 sack2->start_seq = tmp;
1205
1206 tmp = sack1->end_seq;
1207 sack1->end_seq = sack2->end_seq;
1208 sack2->end_seq = tmp;
1209}
1210
1211static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
1212{
1213 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1214 struct tcp_sack_block *sp = &tp->selective_acks[0];
1215 int cur_sacks = tp->num_sacks;
1216
1217 if (!cur_sacks)
1218 goto new_sack;
1219
1220
1221
1222
1223
1224 if(sp->end_seq == TCP_SKB_CB(skb)->seq) {
1225 sp->end_seq = TCP_SKB_CB(skb)->end_seq;
1226 tcp_sack_maybe_coalesce(tp, sp);
1227 } else if(sp->start_seq == TCP_SKB_CB(skb)->end_seq) {
1228
1229
1230
1231 sp->start_seq = TCP_SKB_CB(skb)->seq;
1232 tcp_sack_maybe_coalesce(tp, sp);
1233 } else {
1234 struct tcp_sack_block *swap = sp + 1;
1235 int this_sack, max_sacks = (tp->tstamp_ok ? 3 : 4);
1236
1237
1238
1239
1240
1241 for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
1242 if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
1243 (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
1244 if(swap->end_seq == TCP_SKB_CB(skb)->seq)
1245 swap->end_seq = TCP_SKB_CB(skb)->end_seq;
1246 else
1247 swap->start_seq = TCP_SKB_CB(skb)->seq;
1248 tcp_sack_swap(sp, swap);
1249 tcp_sack_maybe_coalesce(tp, sp);
1250 return;
1251 }
1252 }
1253
1254
1255
1256
1257
1258
1259
1260 if (cur_sacks >= max_sacks) {
1261 cur_sacks--;
1262 tp->num_sacks--;
1263 }
1264 while(cur_sacks >= 1) {
1265 struct tcp_sack_block *this = &tp->selective_acks[cur_sacks];
1266 struct tcp_sack_block *prev = (this - 1);
1267 this->start_seq = prev->start_seq;
1268 this->end_seq = prev->end_seq;
1269 cur_sacks--;
1270 }
1271
1272 new_sack:
1273
1274 sp->start_seq = TCP_SKB_CB(skb)->seq;
1275 sp->end_seq = TCP_SKB_CB(skb)->end_seq;
1276 tp->num_sacks++;
1277 }
1278}
1279
1280static void tcp_sack_remove_skb(struct tcp_opt *tp, struct sk_buff *skb)
1281{
1282 struct tcp_sack_block *sp = &tp->selective_acks[0];
1283 int num_sacks = tp->num_sacks;
1284 int this_sack;
1285
1286
1287
1288
1289
1290 for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
1291
1292 if(!before(sp->start_seq, TCP_SKB_CB(skb)->seq) &&
1293 before(sp->start_seq, TCP_SKB_CB(skb)->end_seq))
1294 break;
1295 }
1296
1297
1298
1299
1300
1301 if(this_sack >= num_sacks)
1302 return;
1303
1304 sp->start_seq = TCP_SKB_CB(skb)->end_seq;
1305 if(!before(sp->start_seq, sp->end_seq)) {
1306
1307 for(this_sack += 1; this_sack < num_sacks; this_sack++, sp++) {
1308 struct tcp_sack_block *next = (sp + 1);
1309 sp->start_seq = next->start_seq;
1310 sp->end_seq = next->end_seq;
1311 }
1312 tp->num_sacks--;
1313 }
1314}
1315
1316static void tcp_sack_extend(struct tcp_opt *tp, struct sk_buff *old_skb, struct sk_buff *new_skb)
1317{
1318 struct tcp_sack_block *sp = &tp->selective_acks[0];
1319 int num_sacks = tp->num_sacks;
1320 int this_sack;
1321
1322 for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
1323 if(sp->end_seq == TCP_SKB_CB(old_skb)->end_seq)
1324 break;
1325 }
1326 if(this_sack >= num_sacks)
1327 return;
1328 sp->end_seq = TCP_SKB_CB(new_skb)->end_seq;
1329}
1330
1331
1332
1333
1334static void tcp_ofo_queue(struct sock *sk)
1335{
1336 struct sk_buff *skb;
1337 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1338
1339 while ((skb = skb_peek(&tp->out_of_order_queue))) {
1340 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
1341 break;
1342
1343 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
1344 SOCK_DEBUG(sk, "ofo packet was already received \n");
1345 __skb_unlink(skb, skb->list);
1346 kfree_skb(skb);
1347 continue;
1348 }
1349 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
1350 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
1351 TCP_SKB_CB(skb)->end_seq);
1352
1353 if(tp->sack_ok)
1354 tcp_sack_remove_skb(tp, skb);
1355 __skb_unlink(skb, skb->list);
1356 __skb_queue_tail(&sk->receive_queue, skb);
1357 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1358 if(skb->h.th->fin)
1359 tcp_fin(skb, sk, skb->h.th);
1360 }
1361}
1362
1363static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
1364{
1365 struct sk_buff *skb1;
1366 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1367
1368
1369
1370
1371
1372 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
1373
1374 queue_and_out:
1375 dst_confirm(sk->dst_cache);
1376 __skb_queue_tail(&sk->receive_queue, skb);
1377 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1378 if(skb->h.th->fin) {
1379 tcp_fin(skb, sk, skb->h.th);
1380 } else {
1381 tcp_remember_ack(tp, skb->h.th, skb);
1382 }
1383
1384 if(tp->sack_ok && tp->num_sacks)
1385 tcp_sack_remove_skb(tp, skb);
1386 tcp_ofo_queue(sk);
1387
1388
1389 if (skb_queue_len(&tp->out_of_order_queue) == 0)
1390 tp->pred_flags = htonl(((tp->tcp_header_len >> 2) << 28) |
1391 (0x10 << 16) |
1392 tp->snd_wnd);
1393 return;
1394 }
1395
1396
1397 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
1398
1399 SOCK_DEBUG(sk, "retransmit received: seq %X\n", TCP_SKB_CB(skb)->seq);
1400 tcp_enter_quickack_mode(tp);
1401 kfree_skb(skb);
1402 return;
1403 }
1404
1405 if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
1406
1407 SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
1408 tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
1409 TCP_SKB_CB(skb)->end_seq);
1410
1411 goto queue_and_out;
1412 }
1413
1414
1415 tp->delayed_acks++;
1416 tcp_enter_quickack_mode(tp);
1417
1418
1419 tp->pred_flags = 0;
1420
1421 SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
1422 tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
1423
1424 if (skb_peek(&tp->out_of_order_queue) == NULL) {
1425
1426 if(tp->sack_ok) {
1427 tp->num_sacks = 1;
1428 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
1429 tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq;
1430 }
1431 __skb_queue_head(&tp->out_of_order_queue,skb);
1432 } else {
1433 for(skb1=tp->out_of_order_queue.prev; ; skb1 = skb1->prev) {
1434
1435 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb1)->seq) {
1436 if (skb->len >= skb1->len) {
1437 if(tp->sack_ok)
1438 tcp_sack_extend(tp, skb1, skb);
1439 __skb_append(skb1, skb);
1440 __skb_unlink(skb1, skb1->list);
1441 kfree_skb(skb1);
1442 } else {
1443
1444
1445
1446 kfree_skb(skb);
1447 }
1448 break;
1449 }
1450
1451 if (after(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) {
1452 __skb_append(skb1, skb);
1453 if(tp->sack_ok)
1454 tcp_sack_new_ofo_skb(sk, skb);
1455 break;
1456 }
1457
1458
1459 if (skb1 == skb_peek(&tp->out_of_order_queue)) {
1460 __skb_queue_head(&tp->out_of_order_queue,skb);
1461 if(tp->sack_ok)
1462 tcp_sack_new_ofo_skb(sk, skb);
1463 break;
1464 }
1465 }
1466 }
1467}
1468
1469
1470
1471
1472
1473
1474
1475
1476static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len)
1477{
1478 struct tcphdr *th;
1479 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1480
1481 th = skb->h.th;
1482 skb_pull(skb, th->doff*4);
1483 skb_trim(skb, len - (th->doff*4));
1484
1485 if (skb->len == 0 && !th->fin)
1486 return(0);
1487
1488
1489
1490
1491
1492
1493 if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) {
1494 if (prune_queue(sk) < 0) {
1495
1496
1497
1498 return 0;
1499 }
1500 }
1501
1502 tcp_data_queue(sk, skb);
1503
1504 if (before(tp->rcv_nxt, tp->copied_seq)) {
1505 printk(KERN_DEBUG "*** tcp.c:tcp_data bug acked < copied\n");
1506 tp->rcv_nxt = tp->copied_seq;
1507 }
1508
1509
1510
1511
1512 if (!sk->dead) {
1513 SOCK_DEBUG(sk, "Data wakeup.\n");
1514 sk->data_ready(sk,0);
1515 }
1516 return(1);
1517}
1518
1519static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb)
1520{
1521 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1522
1523 if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
1524 tcp_packets_in_flight(tp) < tp->snd_cwnd) {
1525
1526 tcp_write_xmit(sk);
1527 } else if (tp->packets_out == 0 && !tp->pending) {
1528
1529 tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
1530 }
1531}
1532
1533static __inline__ void tcp_data_snd_check(struct sock *sk)
1534{
1535 struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
1536
1537 if (skb != NULL)
1538 __tcp_data_snd_check(sk, skb);
1539}
1540
1541
1542
1543
1544
1545static __inline__ void tcp_measure_rcv_mss(struct sock *sk, struct sk_buff *skb)
1546{
1547 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1548 unsigned int len = skb->len, lss;
1549
1550 if (len > tp->rcv_mss)
1551 tp->rcv_mss = len;
1552 lss = tp->last_seg_size;
1553 tp->last_seg_size = 0;
1554 if (len >= 536) {
1555 if (len == lss)
1556 tp->rcv_mss = len;
1557 tp->last_seg_size = len;
1558 }
1559}
1560
1561
1562
1563
1564static __inline__ void __tcp_ack_snd_check(struct sock *sk)
1565{
1566 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583 if (((tp->rcv_nxt - tp->rcv_wup) >= tp->rcv_mss * MAX_DELAY_ACK) ||
1584
1585 tcp_raise_window(sk) ||
1586
1587 tcp_in_quickack_mode(tp) ||
1588
1589 (skb_peek(&tp->out_of_order_queue) != NULL)) {
1590
1591 tcp_send_ack(sk);
1592 } else {
1593
1594 tcp_send_delayed_ack(tp, HZ/2);
1595 }
1596}
1597
1598static __inline__ void tcp_ack_snd_check(struct sock *sk)
1599{
1600 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1601 if (tp->delayed_acks == 0) {
1602
1603 return;
1604 }
1605 __tcp_ack_snd_check(sk);
1606}
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
1620{
1621 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1622 u32 ptr = ntohs(th->urg_ptr);
1623
1624 if (ptr && !sysctl_tcp_stdurg)
1625 ptr--;
1626 ptr += ntohl(th->seq);
1627
1628
1629 if (after(tp->copied_seq, ptr))
1630 return;
1631
1632
1633 if (tp->urg_data && !after(ptr, tp->urg_seq))
1634 return;
1635
1636
1637 if (sk->proc != 0) {
1638 if (sk->proc > 0)
1639 kill_proc(sk->proc, SIGURG, 1);
1640 else
1641 kill_pg(-sk->proc, SIGURG, 1);
1642 }
1643
1644
1645
1646
1647
1648
1649
1650 if (tp->urg_seq == tp->copied_seq)
1651 tp->copied_seq++;
1652 tp->urg_data = URG_NOTYET;
1653 tp->urg_seq = ptr;
1654
1655
1656 tp->pred_flags = 0;
1657}
1658
1659
1660static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len)
1661{
1662 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1663
1664
1665 if (th->urg)
1666 tcp_check_urg(sk,th);
1667
1668
1669 if (tp->urg_data == URG_NOTYET) {
1670 u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff*4);
1671
1672
1673 if (ptr < len) {
1674 tp->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
1675 if (!sk->dead)
1676 sk->data_ready(sk,0);
1677 }
1678 }
1679}
1680
1681
1682
1683
1684
1685
1686
1687
1688static int prune_queue(struct sock *sk)
1689{
1690 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1691 struct sk_buff * skb;
1692
1693 SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
1694
1695 net_statistics.PruneCalled++;
1696
1697
1698 skb = __skb_dequeue_tail(&tp->out_of_order_queue);
1699 if(skb != NULL) {
1700
1701 do { net_statistics.OfoPruned += skb->len;
1702 kfree_skb(skb);
1703 skb = __skb_dequeue_tail(&tp->out_of_order_queue);
1704 } while(skb != NULL);
1705
1706
1707
1708
1709
1710
1711 if(tp->sack_ok)
1712 tp->num_sacks = 0;
1713 }
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736 if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1))
1737 return 0;
1738
1739
1740 return -1;
1741}
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
1766 struct tcphdr *th, unsigned len)
1767{
1768 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1769 int queued;
1770 u32 flg;
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790 if (tcp_fast_parse_options(sk, th, tp)) {
1791 if (tp->saw_tstamp) {
1792 if (tcp_paws_discard(tp, th, len)) {
1793 tcp_statistics.TcpInErrs++;
1794 if (!th->rst) {
1795 tcp_send_ack(sk);
1796 goto discard;
1797 }
1798 }
1799 tcp_replace_ts_recent(sk, tp,
1800 TCP_SKB_CB(skb)->seq,
1801 TCP_SKB_CB(skb)->end_seq);
1802 }
1803 }
1804
1805 flg = *(((u32 *)th) + 3) & ~htonl(0x8 << 16);
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816 if (flg == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
1817 if (len <= th->doff*4) {
1818
1819 if (len == th->doff*4) {
1820 tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
1821 TCP_SKB_CB(skb)->ack_seq, len);
1822 kfree_skb(skb);
1823 tcp_data_snd_check(sk);
1824 return 0;
1825 } else {
1826 tcp_statistics.TcpInErrs++;
1827 goto discard;
1828 }
1829 } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una &&
1830 atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
1831
1832 __skb_pull(skb,th->doff*4);
1833
1834 tcp_measure_rcv_mss(sk, skb);
1835
1836
1837
1838
1839 __skb_queue_tail(&sk->receive_queue, skb);
1840 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
1841
1842
1843
1844
1845 sk->data_ready(sk, 0);
1846 tcp_delack_estimator(tp);
1847
1848 tcp_remember_ack(tp, th, skb);
1849
1850 __tcp_ack_snd_check(sk);
1851 return 0;
1852 }
1853 }
1854
1855
1856
1857
1858
1859 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
1860
1861
1862
1863
1864
1865
1866 if (th->rst)
1867 goto discard;
1868 if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
1869 SOCK_DEBUG(sk, "seq:%d end:%d wup:%d wnd:%d\n",
1870 TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
1871 tp->rcv_wup, tp->rcv_wnd);
1872 }
1873 tcp_send_ack(sk);
1874 goto discard;
1875 }
1876
1877 if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
1878 SOCK_DEBUG(sk, "syn in established state\n");
1879 tcp_statistics.TcpInErrs++;
1880 tcp_reset(sk);
1881 return 1;
1882 }
1883
1884 if(th->rst) {
1885 tcp_reset(sk);
1886 goto discard;
1887 }
1888
1889 if(th->ack)
1890 tcp_ack(sk, th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len);
1891
1892
1893 tcp_urg(sk, th, len);
1894
1895
1896 queued = tcp_data(skb, sk, len);
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915 tcp_measure_rcv_mss(sk, skb);
1916
1917
1918 if(sk->state != TCP_CLOSE) {
1919 tcp_data_snd_check(sk);
1920 tcp_ack_snd_check(sk);
1921 }
1922
1923 if (!queued) {
1924 discard:
1925 kfree_skb(skb);
1926 }
1927
1928 return 0;
1929}
1930
1931
1932
1933
1934
1935
1936struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
1937 struct open_request *req)
1938{
1939 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1940 u32 flg;
1941
1942
1943
1944
1945
1946
1947
1948 flg = *(((u32 *)skb->h.th) + 3);
1949
1950 flg &= __constant_htonl(0x00170000);
1951
1952 if (flg == __constant_htonl(0x00020000)) {
1953 if (!after(TCP_SKB_CB(skb)->seq, req->rcv_isn)) {
1954
1955
1956 req->class->rtx_syn_ack(sk, req);
1957 return NULL;
1958 } else {
1959 return sk;
1960 }
1961 }
1962
1963
1964 if (req->sk) {
1965
1966
1967
1968 sk = req->sk;
1969 } else {
1970
1971
1972
1973
1974
1975
1976
1977 if (!between(TCP_SKB_CB(skb)->ack_seq, req->snt_isn, req->snt_isn+1) ||
1978 !between(TCP_SKB_CB(skb)->seq, req->rcv_isn,
1979 req->rcv_isn+1+req->rcv_wnd)) {
1980 req->class->send_reset(skb);
1981 return NULL;
1982 }
1983
1984 sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
1985 tcp_dec_slow_timer(TCP_SLT_SYNACK);
1986 if (sk == NULL)
1987 return NULL;
1988
1989 req->expires = 0UL;
1990 req->sk = sk;
1991 }
1992 skb_orphan(skb);
1993 skb_set_owner_r(skb, sk);
1994 return sk;
1995}
1996
1997
1998
1999
2000
2001
2002
2003
2004int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
2005 struct tcphdr *th, unsigned len)
2006{
2007 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2008 int queued = 0;
2009
2010 switch (sk->state) {
2011 case TCP_CLOSE:
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029 return 1;
2030
2031 case TCP_LISTEN:
2032
2033
2034
2035 if(th->ack)
2036 return 1;
2037
2038 if(th->syn) {
2039 if(tp->af_specific->conn_request(sk, skb, 0) < 0)
2040 return 1;
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058 goto discard;
2059 }
2060
2061 goto discard;
2062 break;
2063
2064 case TCP_SYN_SENT:
2065
2066
2067
2068
2069
2070 if(th->ack) {
2071 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
2072
2073
2074 if(!tcp_ack(sk,th, TCP_SKB_CB(skb)->seq,
2075 TCP_SKB_CB(skb)->ack_seq, len))
2076 return 1;
2077
2078 if(th->rst) {
2079 tcp_reset(sk);
2080 goto discard;
2081 }
2082
2083 if(!th->syn)
2084 goto discard;
2085
2086
2087
2088
2089 tp->rcv_nxt = TCP_SKB_CB(skb)->seq+1;
2090 tp->rcv_wup = TCP_SKB_CB(skb)->seq+1;
2091
2092
2093
2094
2095 tp->snd_wnd = htons(th->window);
2096 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
2097 tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
2098 tp->fin_seq = TCP_SKB_CB(skb)->seq;
2099
2100 tcp_set_state(sk, TCP_ESTABLISHED);
2101 tcp_parse_options(sk, th, tp, 0);
2102
2103 if (tp->wscale_ok == 0) {
2104 tp->snd_wscale = tp->rcv_wscale = 0;
2105 tp->window_clamp = min(tp->window_clamp,65535);
2106 }
2107
2108 if (tp->tstamp_ok) {
2109 tp->tcp_header_len =
2110 sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
2111 } else
2112 tp->tcp_header_len = sizeof(struct tcphdr);
2113 if (tp->saw_tstamp) {
2114 tp->ts_recent = tp->rcv_tsval;
2115 tp->ts_recent_stamp = jiffies;
2116 }
2117
2118
2119 tcp_send_ack(sk);
2120
2121 sk->dport = th->source;
2122 tp->copied_seq = tp->rcv_nxt;
2123
2124 if(!sk->dead) {
2125 sk->state_change(sk);
2126 sock_wake_async(sk->socket, 0);
2127 }
2128 } else {
2129 if(th->syn && !th->rst) {
2130
2131
2132
2133
2134
2135 tcp_set_state(sk, TCP_SYN_RECV);
2136 tcp_parse_options(sk, th, tp, 0);
2137 if (tp->saw_tstamp) {
2138 tp->ts_recent = tp->rcv_tsval;
2139 tp->ts_recent_stamp = jiffies;
2140 }
2141
2142 tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
2143 tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
2144
2145
2146
2147
2148 tp->snd_wnd = htons(th->window);
2149 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
2150
2151 tcp_send_synack(sk);
2152 } else
2153 break;
2154 }
2155
2156
2157
2158
2159 tcp_sync_mss(sk, tp->pmtu_cookie);
2160 tp->rcv_mss = tp->mss_cache;
2161
2162 if (sk->state == TCP_SYN_RECV)
2163 goto discard;
2164
2165 goto step6;
2166 }
2167
2168
2169
2170
2171
2172
2173 if (tcp_fast_parse_options(sk, th, tp)) {
2174
2175
2176
2177
2178 if (tp->saw_tstamp) {
2179 if (tcp_paws_discard(tp, th, len)) {
2180 tcp_statistics.TcpInErrs++;
2181 if (!th->rst) {
2182 tcp_send_ack(sk);
2183 goto discard;
2184 }
2185 }
2186 tcp_replace_ts_recent(sk, tp,
2187 TCP_SKB_CB(skb)->seq,
2188 TCP_SKB_CB(skb)->end_seq);
2189 }
2190 }
2191
2192
2193 if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
2194 if (!th->rst) {
2195 tcp_send_ack(sk);
2196 goto discard;
2197 }
2198 }
2199
2200
2201 if(th->rst) {
2202 tcp_reset(sk);
2203 goto discard;
2204 }
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224 if (th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
2225 tcp_reset(sk);
2226 return 1;
2227 }
2228
2229
2230 if (th->ack) {
2231 int acceptable = tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
2232 TCP_SKB_CB(skb)->ack_seq, len);
2233
2234 switch(sk->state) {
2235 case TCP_SYN_RECV:
2236 if (acceptable) {
2237 tcp_set_state(sk, TCP_ESTABLISHED);
2238 sk->dport = th->source;
2239 tp->copied_seq = tp->rcv_nxt;
2240
2241 if(!sk->dead)
2242 sk->state_change(sk);
2243
2244 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
2245 tp->snd_wnd = htons(th->window) << tp->snd_wscale;
2246 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
2247 tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
2248
2249 } else {
2250 SOCK_DEBUG(sk, "bad ack\n");
2251 return 1;
2252 }
2253 break;
2254
2255 case TCP_FIN_WAIT1:
2256 if (tp->snd_una == tp->write_seq) {
2257 sk->shutdown |= SEND_SHUTDOWN;
2258 tcp_set_state(sk, TCP_FIN_WAIT2);
2259 if (!sk->dead)
2260 sk->state_change(sk);
2261 else
2262 tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
2263 }
2264 break;
2265
2266 case TCP_CLOSING:
2267 if (tp->snd_una == tp->write_seq) {
2268 tcp_time_wait(sk);
2269 goto discard;
2270 }
2271 break;
2272
2273 case TCP_LAST_ACK:
2274 if (tp->snd_una == tp->write_seq) {
2275 sk->shutdown = SHUTDOWN_MASK;
2276 tcp_set_state(sk,TCP_CLOSE);
2277 if (!sk->dead)
2278 sk->state_change(sk);
2279 goto discard;
2280 }
2281 break;
2282 }
2283 } else
2284 goto discard;
2285
2286step6:
2287
2288 tcp_urg(sk, th, len);
2289
2290
2291 switch (sk->state) {
2292 case TCP_CLOSE_WAIT:
2293 case TCP_CLOSING:
2294 if (!before(TCP_SKB_CB(skb)->seq, tp->fin_seq))
2295 break;
2296
2297 case TCP_FIN_WAIT1:
2298 case TCP_FIN_WAIT2:
2299
2300
2301
2302
2303 if ((sk->shutdown & RCV_SHUTDOWN) && sk->dead) {
2304 if (after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
2305 tcp_reset(sk);
2306 return 1;
2307 }
2308 }
2309
2310 case TCP_ESTABLISHED:
2311 queued = tcp_data(skb, sk, len);
2312
2313
2314
2315
2316 tcp_measure_rcv_mss(sk, skb);
2317 break;
2318 }
2319
2320 tcp_data_snd_check(sk);
2321 tcp_ack_snd_check(sk);
2322
2323 if (!queued) {
2324discard:
2325 kfree_skb(skb);
2326 }
2327 return 0;
2328}
2329