1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/config.h>
56
57#include <linux/types.h>
58#include <linux/fcntl.h>
59#include <linux/module.h>
60#include <linux/random.h>
61#include <linux/cache.h>
62#include <linux/jhash.h>
63#include <linux/init.h>
64#include <linux/times.h>
65
66#include <net/icmp.h>
67#include <net/tcp.h>
68#include <net/ipv6.h>
69#include <net/inet_common.h>
70#include <net/xfrm.h>
71
72#include <linux/inet.h>
73#include <linux/ipv6.h>
74#include <linux/stddef.h>
75#include <linux/proc_fs.h>
76#include <linux/seq_file.h>
77
78extern int sysctl_ip_dynaddr;
79int sysctl_tcp_tw_reuse;
80int sysctl_tcp_low_latency;
81
82
83#define ICMP_MIN_LENGTH 8
84
85
86static struct socket *tcp_socket;
87
88void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
89 struct sk_buff *skb);
90
91struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
92 .__tcp_lhash_lock = RW_LOCK_UNLOCKED,
93 .__tcp_lhash_users = ATOMIC_INIT(0),
94 .__tcp_lhash_wait
95 = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
96 .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED
97};
98
99
100
101
102
103
104int sysctl_local_port_range[2] = { 1024, 4999 };
105int tcp_port_rover = 1024 - 1;
106
107static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
108 __u32 faddr, __u16 fport)
109{
110 int h = (laddr ^ lport) ^ (faddr ^ fport);
111 h ^= h >> 16;
112 h ^= h >> 8;
113 return h & (tcp_ehash_size - 1);
114}
115
116static __inline__ int tcp_sk_hashfn(struct sock *sk)
117{
118 struct inet_opt *inet = inet_sk(sk);
119 __u32 laddr = inet->rcv_saddr;
120 __u16 lport = inet->num;
121 __u32 faddr = inet->daddr;
122 __u16 fport = inet->dport;
123
124 return tcp_hashfn(laddr, lport, faddr, fport);
125}
126
127
128
129
130struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
131 unsigned short snum)
132{
133 struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
134 SLAB_ATOMIC);
135 if (tb) {
136 tb->port = snum;
137 tb->fastreuse = 0;
138 INIT_HLIST_HEAD(&tb->owners);
139 hlist_add_head(&tb->node, &head->chain);
140 }
141 return tb;
142}
143
144
145void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
146{
147 if (hlist_empty(&tb->owners)) {
148 __hlist_del(&tb->node);
149 kmem_cache_free(tcp_bucket_cachep, tb);
150 }
151}
152
153
154static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
155{
156 struct tcp_bind_hashbucket *head =
157 &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
158 struct tcp_bind_bucket *tb;
159
160 spin_lock(&head->lock);
161 tb = tcp_sk(sk)->bind_hash;
162 sk_add_bind_node(child, &tb->owners);
163 tcp_sk(child)->bind_hash = tb;
164 spin_unlock(&head->lock);
165}
166
167inline void tcp_inherit_port(struct sock *sk, struct sock *child)
168{
169 local_bh_disable();
170 __tcp_inherit_port(sk, child);
171 local_bh_enable();
172}
173
174void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
175 unsigned short snum)
176{
177 inet_sk(sk)->num = snum;
178 sk_add_bind_node(sk, &tb->owners);
179 tcp_sk(sk)->bind_hash = tb;
180}
181
182static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
183{
184 const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
185 struct sock *sk2;
186 struct hlist_node *node;
187 int reuse = sk->sk_reuse;
188
189 sk_for_each_bound(sk2, node, &tb->owners) {
190 if (sk != sk2 &&
191 !tcp_v6_ipv6only(sk2) &&
192 (!sk->sk_bound_dev_if ||
193 !sk2->sk_bound_dev_if ||
194 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
195 if (!reuse || !sk2->sk_reuse ||
196 sk2->sk_state == TCP_LISTEN) {
197 const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
198 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
199 sk2_rcv_saddr == sk_rcv_saddr)
200 break;
201 }
202 }
203 }
204 return node != NULL;
205}
206
207
208
209
210static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
211{
212 struct tcp_bind_hashbucket *head;
213 struct hlist_node *node;
214 struct tcp_bind_bucket *tb;
215 int ret;
216
217 local_bh_disable();
218 if (!snum) {
219 int low = sysctl_local_port_range[0];
220 int high = sysctl_local_port_range[1];
221 int remaining = (high - low) + 1;
222 int rover;
223
224 spin_lock(&tcp_portalloc_lock);
225 rover = tcp_port_rover;
226 do {
227 rover++;
228 if (rover < low || rover > high)
229 rover = low;
230 head = &tcp_bhash[tcp_bhashfn(rover)];
231 spin_lock(&head->lock);
232 tb_for_each(tb, node, &head->chain)
233 if (tb->port == rover)
234 goto next;
235 break;
236 next:
237 spin_unlock(&head->lock);
238 } while (--remaining > 0);
239 tcp_port_rover = rover;
240 spin_unlock(&tcp_portalloc_lock);
241
242
243 ret = 1;
244 if (remaining <= 0)
245 goto fail;
246
247
248
249
250 snum = rover;
251 } else {
252 head = &tcp_bhash[tcp_bhashfn(snum)];
253 spin_lock(&head->lock);
254 tb_for_each(tb, node, &head->chain)
255 if (tb->port == snum)
256 goto tb_found;
257 }
258 tb = NULL;
259 goto tb_not_found;
260tb_found:
261 if (!hlist_empty(&tb->owners)) {
262 if (sk->sk_reuse > 1)
263 goto success;
264 if (tb->fastreuse > 0 &&
265 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
266 goto success;
267 } else {
268 ret = 1;
269 if (tcp_bind_conflict(sk, tb))
270 goto fail_unlock;
271 }
272 }
273tb_not_found:
274 ret = 1;
275 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
276 goto fail_unlock;
277 if (hlist_empty(&tb->owners)) {
278 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
279 tb->fastreuse = 1;
280 else
281 tb->fastreuse = 0;
282 } else if (tb->fastreuse &&
283 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
284 tb->fastreuse = 0;
285success:
286 if (!tcp_sk(sk)->bind_hash)
287 tcp_bind_hash(sk, tb, snum);
288 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
289 ret = 0;
290
291fail_unlock:
292 spin_unlock(&head->lock);
293fail:
294 local_bh_enable();
295 return ret;
296}
297
298
299
300
301static void __tcp_put_port(struct sock *sk)
302{
303 struct inet_opt *inet = inet_sk(sk);
304 struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
305 struct tcp_bind_bucket *tb;
306
307 spin_lock(&head->lock);
308 tb = tcp_sk(sk)->bind_hash;
309 __sk_del_bind_node(sk);
310 tcp_sk(sk)->bind_hash = NULL;
311 inet->num = 0;
312 tcp_bucket_destroy(tb);
313 spin_unlock(&head->lock);
314}
315
316inline void tcp_put_port(struct sock *sk)
317{
318 local_bh_disable();
319 __tcp_put_port(sk);
320 local_bh_enable();
321}
322
323
324
325
326
327
328
329
330void tcp_listen_wlock(void)
331{
332 write_lock(&tcp_lhash_lock);
333
334 if (atomic_read(&tcp_lhash_users)) {
335 DEFINE_WAIT(wait);
336
337 for (;;) {
338 prepare_to_wait_exclusive(&tcp_lhash_wait,
339 &wait, TASK_UNINTERRUPTIBLE);
340 if (!atomic_read(&tcp_lhash_users))
341 break;
342 write_unlock_bh(&tcp_lhash_lock);
343 schedule();
344 write_lock_bh(&tcp_lhash_lock);
345 }
346
347 finish_wait(&tcp_lhash_wait, &wait);
348 }
349}
350
351static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
352{
353 struct hlist_head *list;
354 rwlock_t *lock;
355
356 BUG_TRAP(sk_unhashed(sk));
357 if (listen_possible && sk->sk_state == TCP_LISTEN) {
358 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
359 lock = &tcp_lhash_lock;
360 tcp_listen_wlock();
361 } else {
362 list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
363 lock = &tcp_ehash[sk->sk_hashent].lock;
364 write_lock(lock);
365 }
366 __sk_add_node(sk, list);
367 sock_prot_inc_use(sk->sk_prot);
368 write_unlock(lock);
369 if (listen_possible && sk->sk_state == TCP_LISTEN)
370 wake_up(&tcp_lhash_wait);
371}
372
373static void tcp_v4_hash(struct sock *sk)
374{
375 if (sk->sk_state != TCP_CLOSE) {
376 local_bh_disable();
377 __tcp_v4_hash(sk, 1);
378 local_bh_enable();
379 }
380}
381
382void tcp_unhash(struct sock *sk)
383{
384 rwlock_t *lock;
385
386 if (sk_unhashed(sk))
387 goto ende;
388
389 if (sk->sk_state == TCP_LISTEN) {
390 local_bh_disable();
391 tcp_listen_wlock();
392 lock = &tcp_lhash_lock;
393 } else {
394 struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
395 lock = &head->lock;
396 write_lock_bh(&head->lock);
397 }
398
399 if (__sk_del_node_init(sk))
400 sock_prot_dec_use(sk->sk_prot);
401 write_unlock_bh(lock);
402
403 ende:
404 if (sk->sk_state == TCP_LISTEN)
405 wake_up(&tcp_lhash_wait);
406}
407
408
409
410
411
412
413
414static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
415 unsigned short hnum, int dif)
416{
417 struct sock *result = NULL, *sk;
418 struct hlist_node *node;
419 int score, hiscore;
420
421 hiscore=-1;
422 sk_for_each(sk, node, head) {
423 struct inet_opt *inet = inet_sk(sk);
424
425 if (inet->num == hnum && !ipv6_only_sock(sk)) {
426 __u32 rcv_saddr = inet->rcv_saddr;
427
428 score = (sk->sk_family == PF_INET ? 1 : 0);
429 if (rcv_saddr) {
430 if (rcv_saddr != daddr)
431 continue;
432 score+=2;
433 }
434 if (sk->sk_bound_dev_if) {
435 if (sk->sk_bound_dev_if != dif)
436 continue;
437 score+=2;
438 }
439 if (score == 5)
440 return sk;
441 if (score > hiscore) {
442 hiscore = score;
443 result = sk;
444 }
445 }
446 }
447 return result;
448}
449
450
451inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum,
452 int dif)
453{
454 struct sock *sk = NULL;
455 struct hlist_head *head;
456
457 read_lock(&tcp_lhash_lock);
458 head = &tcp_listening_hash[tcp_lhashfn(hnum)];
459 if (!hlist_empty(head)) {
460 struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
461
462 if (inet->num == hnum && !sk->sk_node.next &&
463 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
464 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
465 !sk->sk_bound_dev_if)
466 goto sherry_cache;
467 sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
468 }
469 if (sk) {
470sherry_cache:
471 sock_hold(sk);
472 }
473 read_unlock(&tcp_lhash_lock);
474 return sk;
475}
476
477
478
479
480
481
482
483static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
484 u32 daddr, u16 hnum,
485 int dif)
486{
487 struct tcp_ehash_bucket *head;
488 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
489 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
490 struct sock *sk;
491 struct hlist_node *node;
492
493
494
495 int hash = tcp_hashfn(daddr, hnum, saddr, sport);
496 head = &tcp_ehash[hash];
497 read_lock(&head->lock);
498 sk_for_each(sk, node, &head->chain) {
499 if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
500 goto hit;
501 }
502
503
504 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
505 if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
506 goto hit;
507 }
508 sk = NULL;
509out:
510 read_unlock(&head->lock);
511 return sk;
512hit:
513 sock_hold(sk);
514 goto out;
515}
516
517static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
518 u32 daddr, u16 hnum, int dif)
519{
520 struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
521 daddr, hnum, dif);
522
523 return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
524}
525
526inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
527 u16 dport, int dif)
528{
529 struct sock *sk;
530
531 local_bh_disable();
532 sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
533 local_bh_enable();
534
535 return sk;
536}
537
538static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
539{
540 return secure_tcp_sequence_number(skb->nh.iph->daddr,
541 skb->nh.iph->saddr,
542 skb->h.th->dest,
543 skb->h.th->source);
544}
545
546
547static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
548 struct tcp_tw_bucket **twp)
549{
550 struct inet_opt *inet = inet_sk(sk);
551 u32 daddr = inet->rcv_saddr;
552 u32 saddr = inet->daddr;
553 int dif = sk->sk_bound_dev_if;
554 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
555 __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
556 int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
557 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
558 struct sock *sk2;
559 struct hlist_node *node;
560 struct tcp_tw_bucket *tw;
561
562 write_lock(&head->lock);
563
564
565 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
566 tw = (struct tcp_tw_bucket *)sk2;
567
568 if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
569 struct tcp_opt *tp = tcp_sk(sk);
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585 if (tw->tw_ts_recent_stamp &&
586 (!twp || (sysctl_tcp_tw_reuse &&
587 xtime.tv_sec -
588 tw->tw_ts_recent_stamp > 1))) {
589 if ((tp->write_seq =
590 tw->tw_snd_nxt + 65535 + 2) == 0)
591 tp->write_seq = 1;
592 tp->ts_recent = tw->tw_ts_recent;
593 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
594 sock_hold(sk2);
595 goto unique;
596 } else
597 goto not_unique;
598 }
599 }
600 tw = NULL;
601
602
603 sk_for_each(sk2, node, &head->chain) {
604 if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
605 goto not_unique;
606 }
607
608unique:
609
610
611 inet->num = lport;
612 inet->sport = htons(lport);
613 sk->sk_hashent = hash;
614 BUG_TRAP(sk_unhashed(sk));
615 __sk_add_node(sk, &head->chain);
616 sock_prot_inc_use(sk->sk_prot);
617 write_unlock(&head->lock);
618
619 if (twp) {
620 *twp = tw;
621 NET_INC_STATS_BH(TimeWaitRecycled);
622 } else if (tw) {
623
624 tcp_tw_deschedule(tw);
625 NET_INC_STATS_BH(TimeWaitRecycled);
626
627 tcp_tw_put(tw);
628 }
629
630 return 0;
631
632not_unique:
633 write_unlock(&head->lock);
634 return -EADDRNOTAVAIL;
635}
636
637
638
639
640static int tcp_v4_hash_connect(struct sock *sk)
641{
642 unsigned short snum = inet_sk(sk)->num;
643 struct tcp_bind_hashbucket *head;
644 struct tcp_bind_bucket *tb;
645 int ret;
646
647 if (!snum) {
648 int rover;
649 int low = sysctl_local_port_range[0];
650 int high = sysctl_local_port_range[1];
651 int remaining = (high - low) + 1;
652 struct hlist_node *node;
653 struct tcp_tw_bucket *tw = NULL;
654
655 local_bh_disable();
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670 spin_lock(&tcp_portalloc_lock);
671 rover = tcp_port_rover;
672
673 do {
674 rover++;
675 if ((rover < low) || (rover > high))
676 rover = low;
677 head = &tcp_bhash[tcp_bhashfn(rover)];
678 spin_lock(&head->lock);
679
680
681
682
683
684 tb_for_each(tb, node, &head->chain) {
685 if (tb->port == rover) {
686 BUG_TRAP(!hlist_empty(&tb->owners));
687 if (tb->fastreuse >= 0)
688 goto next_port;
689 if (!__tcp_v4_check_established(sk,
690 rover,
691 &tw))
692 goto ok;
693 goto next_port;
694 }
695 }
696
697 tb = tcp_bucket_create(head, rover);
698 if (!tb) {
699 spin_unlock(&head->lock);
700 break;
701 }
702 tb->fastreuse = -1;
703 goto ok;
704
705 next_port:
706 spin_unlock(&head->lock);
707 } while (--remaining > 0);
708 tcp_port_rover = rover;
709 spin_unlock(&tcp_portalloc_lock);
710
711 local_bh_enable();
712
713 return -EADDRNOTAVAIL;
714
715ok:
716
717 tcp_port_rover = rover;
718 spin_unlock(&tcp_portalloc_lock);
719
720 tcp_bind_hash(sk, tb, rover);
721 if (sk_unhashed(sk)) {
722 inet_sk(sk)->sport = htons(rover);
723 __tcp_v4_hash(sk, 0);
724 }
725 spin_unlock(&head->lock);
726
727 if (tw) {
728 tcp_tw_deschedule(tw);
729 tcp_tw_put(tw);
730 }
731
732 ret = 0;
733 goto out;
734 }
735
736 head = &tcp_bhash[tcp_bhashfn(snum)];
737 tb = tcp_sk(sk)->bind_hash;
738 spin_lock_bh(&head->lock);
739 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
740 __tcp_v4_hash(sk, 0);
741 spin_unlock_bh(&head->lock);
742 return 0;
743 } else {
744 spin_unlock(&head->lock);
745
746 ret = __tcp_v4_check_established(sk, snum, NULL);
747out:
748 local_bh_enable();
749 return ret;
750 }
751}
752
753
754int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
755{
756 struct inet_opt *inet = inet_sk(sk);
757 struct tcp_opt *tp = tcp_sk(sk);
758 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
759 struct rtable *rt;
760 u32 daddr, nexthop;
761 int tmp;
762 int err;
763
764 if (addr_len < sizeof(struct sockaddr_in))
765 return -EINVAL;
766
767 if (usin->sin_family != AF_INET)
768 return -EAFNOSUPPORT;
769
770 nexthop = daddr = usin->sin_addr.s_addr;
771 if (inet->opt && inet->opt->srr) {
772 if (!daddr)
773 return -EINVAL;
774 nexthop = inet->opt->faddr;
775 }
776
777 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
778 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
779 IPPROTO_TCP,
780 inet->sport, usin->sin_port, sk);
781 if (tmp < 0)
782 return tmp;
783
784 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
785 ip_rt_put(rt);
786 return -ENETUNREACH;
787 }
788
789 if (!inet->opt || !inet->opt->srr)
790 daddr = rt->rt_dst;
791
792 if (!inet->saddr)
793 inet->saddr = rt->rt_src;
794 inet->rcv_saddr = inet->saddr;
795
796 if (tp->ts_recent_stamp && inet->daddr != daddr) {
797
798 tp->ts_recent = 0;
799 tp->ts_recent_stamp = 0;
800 tp->write_seq = 0;
801 }
802
803 if (sysctl_tcp_tw_recycle &&
804 !tp->ts_recent_stamp && rt->rt_dst == daddr) {
805 struct inet_peer *peer = rt_get_peer(rt);
806
807
808
809
810
811
812 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
813 tp->ts_recent_stamp = peer->tcp_ts_stamp;
814 tp->ts_recent = peer->tcp_ts;
815 }
816 }
817
818 inet->dport = usin->sin_port;
819 inet->daddr = daddr;
820
821 tp->ext_header_len = 0;
822 if (inet->opt)
823 tp->ext_header_len = inet->opt->optlen;
824
825 tp->mss_clamp = 536;
826
827
828
829
830
831
832 tcp_set_state(sk, TCP_SYN_SENT);
833 err = tcp_v4_hash_connect(sk);
834 if (err)
835 goto failure;
836
837 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
838 if (err)
839 goto failure;
840
841
842 __sk_dst_set(sk, &rt->u.dst);
843 tcp_v4_setup_caps(sk, &rt->u.dst);
844 tp->ext2_header_len = rt->u.dst.header_len;
845
846 if (!tp->write_seq)
847 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
848 inet->daddr,
849 inet->sport,
850 usin->sin_port);
851
852 inet->id = tp->write_seq ^ jiffies;
853
854 err = tcp_connect(sk);
855 rt = NULL;
856 if (err)
857 goto failure;
858
859 return 0;
860
861failure:
862
863 tcp_set_state(sk, TCP_CLOSE);
864 ip_rt_put(rt);
865 sk->sk_route_caps = 0;
866 inet->dport = 0;
867 return err;
868}
869
870static __inline__ int tcp_v4_iif(struct sk_buff *skb)
871{
872 return ((struct rtable *)skb->dst)->rt_iif;
873}
874
875static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
876{
877 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
878}
879
880static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
881 struct open_request ***prevp,
882 __u16 rport,
883 __u32 raddr, __u32 laddr)
884{
885 struct tcp_listen_opt *lopt = tp->listen_opt;
886 struct open_request *req, **prev;
887
888 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
889 (req = *prev) != NULL;
890 prev = &req->dl_next) {
891 if (req->rmt_port == rport &&
892 req->af.v4_req.rmt_addr == raddr &&
893 req->af.v4_req.loc_addr == laddr &&
894 TCP_INET_FAMILY(req->class->family)) {
895 BUG_TRAP(!req->sk);
896 *prevp = prev;
897 break;
898 }
899 }
900
901 return req;
902}
903
904static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
905{
906 struct tcp_opt *tp = tcp_sk(sk);
907 struct tcp_listen_opt *lopt = tp->listen_opt;
908 u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
909
910 req->expires = jiffies + TCP_TIMEOUT_INIT;
911 req->retrans = 0;
912 req->sk = NULL;
913 req->dl_next = lopt->syn_table[h];
914
915 write_lock(&tp->syn_wait_lock);
916 lopt->syn_table[h] = req;
917 write_unlock(&tp->syn_wait_lock);
918
919 tcp_synq_added(sk);
920}
921
922
923
924
925
926static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
927 u32 mtu)
928{
929 struct dst_entry *dst;
930 struct inet_opt *inet = inet_sk(sk);
931 struct tcp_opt *tp = tcp_sk(sk);
932
933
934
935
936
937 if (sk->sk_state == TCP_LISTEN)
938 return;
939
940
941
942
943
944
945
946 if ((dst = __sk_dst_check(sk, 0)) == NULL)
947 return;
948
949 dst->ops->update_pmtu(dst, mtu);
950
951
952
953
954 if (mtu < dst_pmtu(dst) && ip_dont_fragment(sk, dst))
955 sk->sk_err_soft = EMSGSIZE;
956
957 mtu = dst_pmtu(dst);
958
959 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
960 tp->pmtu_cookie > mtu) {
961 tcp_sync_mss(sk, mtu);
962
963
964
965
966
967
968 tcp_simple_retransmit(sk);
969 }
970}
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988void tcp_v4_err(struct sk_buff *skb, u32 info)
989{
990 struct iphdr *iph = (struct iphdr *)skb->data;
991 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
992 struct tcp_opt *tp;
993 struct inet_opt *inet;
994 int type = skb->h.icmph->type;
995 int code = skb->h.icmph->code;
996 struct sock *sk;
997 __u32 seq;
998 int err;
999
1000 if (skb->len < (iph->ihl << 2) + 8) {
1001 ICMP_INC_STATS_BH(IcmpInErrors);
1002 return;
1003 }
1004
1005 sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
1006 th->source, tcp_v4_iif(skb));
1007 if (!sk) {
1008 ICMP_INC_STATS_BH(IcmpInErrors);
1009 return;
1010 }
1011 if (sk->sk_state == TCP_TIME_WAIT) {
1012 tcp_tw_put((struct tcp_tw_bucket *)sk);
1013 return;
1014 }
1015
1016 bh_lock_sock(sk);
1017
1018
1019
1020 if (sock_owned_by_user(sk))
1021 NET_INC_STATS_BH(LockDroppedIcmps);
1022
1023 if (sk->sk_state == TCP_CLOSE)
1024 goto out;
1025
1026 tp = tcp_sk(sk);
1027 seq = ntohl(th->seq);
1028 if (sk->sk_state != TCP_LISTEN &&
1029 !between(seq, tp->snd_una, tp->snd_nxt)) {
1030 NET_INC_STATS(OutOfWindowIcmps);
1031 goto out;
1032 }
1033
1034 switch (type) {
1035 case ICMP_SOURCE_QUENCH:
1036
1037
1038
1039 if (!sock_owned_by_user(sk))
1040 tcp_enter_cwr(tp);
1041 goto out;
1042 case ICMP_PARAMETERPROB:
1043 err = EPROTO;
1044 break;
1045 case ICMP_DEST_UNREACH:
1046 if (code > NR_ICMP_UNREACH)
1047 goto out;
1048
1049 if (code == ICMP_FRAG_NEEDED) {
1050 if (!sock_owned_by_user(sk))
1051 do_pmtu_discovery(sk, iph, info);
1052 goto out;
1053 }
1054
1055 err = icmp_err_convert[code].errno;
1056 break;
1057 case ICMP_TIME_EXCEEDED:
1058 err = EHOSTUNREACH;
1059 break;
1060 default:
1061 goto out;
1062 }
1063
1064 switch (sk->sk_state) {
1065 struct open_request *req, **prev;
1066 case TCP_LISTEN:
1067 if (sock_owned_by_user(sk))
1068 goto out;
1069
1070 req = tcp_v4_search_req(tp, &prev, th->dest,
1071 iph->daddr, iph->saddr);
1072 if (!req)
1073 goto out;
1074
1075
1076
1077
1078 BUG_TRAP(!req->sk);
1079
1080 if (seq != req->snt_isn) {
1081 NET_INC_STATS_BH(OutOfWindowIcmps);
1082 goto out;
1083 }
1084
1085
1086
1087
1088
1089
1090
1091 tcp_synq_drop(sk, req, prev);
1092 goto out;
1093
1094 case TCP_SYN_SENT:
1095 case TCP_SYN_RECV:
1096
1097
1098 if (!sock_owned_by_user(sk)) {
1099 TCP_INC_STATS_BH(TcpAttemptFails);
1100 sk->sk_err = err;
1101
1102 sk->sk_error_report(sk);
1103
1104 tcp_done(sk);
1105 } else {
1106 sk->sk_err_soft = err;
1107 }
1108 goto out;
1109 }
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 inet = inet_sk(sk);
1128 if (!sock_owned_by_user(sk) && inet->recverr) {
1129 sk->sk_err = err;
1130 sk->sk_error_report(sk);
1131 } else {
1132 sk->sk_err_soft = err;
1133 }
1134
1135out:
1136 bh_unlock_sock(sk);
1137 sock_put(sk);
1138}
1139
1140
1141void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
1142 struct sk_buff *skb)
1143{
1144 struct inet_opt *inet = inet_sk(sk);
1145
1146 if (skb->ip_summed == CHECKSUM_HW) {
1147 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
1148 skb->csum = offsetof(struct tcphdr, check);
1149 } else {
1150 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
1151 csum_partial((char *)th,
1152 th->doff << 2,
1153 skb->csum));
1154 }
1155}
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170static void tcp_v4_send_reset(struct sk_buff *skb)
1171{
1172 struct tcphdr *th = skb->h.th;
1173 struct tcphdr rth;
1174 struct ip_reply_arg arg;
1175
1176
1177 if (th->rst)
1178 return;
1179
1180 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
1181 return;
1182
1183
1184 memset(&rth, 0, sizeof(struct tcphdr));
1185 rth.dest = th->source;
1186 rth.source = th->dest;
1187 rth.doff = sizeof(struct tcphdr) / 4;
1188 rth.rst = 1;
1189
1190 if (th->ack) {
1191 rth.seq = th->ack_seq;
1192 } else {
1193 rth.ack = 1;
1194 rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
1195 skb->len - (th->doff << 2));
1196 }
1197
1198 memset(&arg, 0, sizeof arg);
1199 arg.iov[0].iov_base = (unsigned char *)&rth;
1200 arg.iov[0].iov_len = sizeof rth;
1201 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1202 skb->nh.iph->saddr,
1203 sizeof(struct tcphdr), IPPROTO_TCP, 0);
1204 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1205
1206 ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
1207
1208 TCP_INC_STATS_BH(TcpOutSegs);
1209 TCP_INC_STATS_BH(TcpOutRsts);
1210}
1211
1212
1213
1214
1215
1216static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
1217 u32 win, u32 ts)
1218{
1219 struct tcphdr *th = skb->h.th;
1220 struct {
1221 struct tcphdr th;
1222 u32 tsopt[3];
1223 } rep;
1224 struct ip_reply_arg arg;
1225
1226 memset(&rep.th, 0, sizeof(struct tcphdr));
1227 memset(&arg, 0, sizeof arg);
1228
1229 arg.iov[0].iov_base = (unsigned char *)&rep;
1230 arg.iov[0].iov_len = sizeof(rep.th);
1231 if (ts) {
1232 rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1233 (TCPOPT_TIMESTAMP << 8) |
1234 TCPOLEN_TIMESTAMP);
1235 rep.tsopt[1] = htonl(tcp_time_stamp);
1236 rep.tsopt[2] = htonl(ts);
1237 arg.iov[0].iov_len = sizeof(rep);
1238 }
1239
1240
1241 rep.th.dest = th->source;
1242 rep.th.source = th->dest;
1243 rep.th.doff = arg.iov[0].iov_len / 4;
1244 rep.th.seq = htonl(seq);
1245 rep.th.ack_seq = htonl(ack);
1246 rep.th.ack = 1;
1247 rep.th.window = htons(win);
1248
1249 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1250 skb->nh.iph->saddr,
1251 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1252 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1253
1254 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1255
1256 TCP_INC_STATS_BH(TcpOutSegs);
1257}
1258
1259static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1260{
1261 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1262
1263 tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1264 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1265
1266 tcp_tw_put(tw);
1267}
1268
1269static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
1270{
1271 tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,
1272 req->ts_recent);
1273}
1274
1275static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1276 struct open_request *req)
1277{
1278 struct rtable *rt;
1279 struct ip_options *opt = req->af.v4_req.opt;
1280 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1281 .nl_u = { .ip4_u =
1282 { .daddr = ((opt && opt->srr) ?
1283 opt->faddr :
1284 req->af.v4_req.rmt_addr),
1285 .saddr = req->af.v4_req.loc_addr,
1286 .tos = RT_CONN_FLAGS(sk) } },
1287 .proto = IPPROTO_TCP,
1288 .uli_u = { .ports =
1289 { .sport = inet_sk(sk)->sport,
1290 .dport = req->rmt_port } } };
1291
1292 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1293 IP_INC_STATS_BH(IpOutNoRoutes);
1294 return NULL;
1295 }
1296 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1297 ip_rt_put(rt);
1298 IP_INC_STATS_BH(IpOutNoRoutes);
1299 return NULL;
1300 }
1301 return &rt->u.dst;
1302}
1303
1304
1305
1306
1307
1308
1309static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1310 struct dst_entry *dst)
1311{
1312 int err = -1;
1313 struct sk_buff * skb;
1314
1315
1316 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1317 goto out;
1318
1319 skb = tcp_make_synack(sk, dst, req);
1320
1321 if (skb) {
1322 struct tcphdr *th = skb->h.th;
1323
1324 th->check = tcp_v4_check(th, skb->len,
1325 req->af.v4_req.loc_addr,
1326 req->af.v4_req.rmt_addr,
1327 csum_partial((char *)th, skb->len,
1328 skb->csum));
1329
1330 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
1331 req->af.v4_req.rmt_addr,
1332 req->af.v4_req.opt);
1333 if (err == NET_XMIT_CN)
1334 err = 0;
1335 }
1336
1337out:
1338 dst_release(dst);
1339 return err;
1340}
1341
1342
1343
1344
1345static void tcp_v4_or_free(struct open_request *req)
1346{
1347 if (req->af.v4_req.opt)
1348 kfree(req->af.v4_req.opt);
1349}
1350
1351static inline void syn_flood_warning(struct sk_buff *skb)
1352{
1353 static unsigned long warntime;
1354
1355 if (time_after(jiffies, (warntime + HZ * 60))) {
1356 warntime = jiffies;
1357 printk(KERN_INFO
1358 "possible SYN flooding on port %d. Sending cookies.\n",
1359 ntohs(skb->h.th->dest));
1360 }
1361}
1362
1363
1364
1365
1366static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1367 struct sk_buff *skb)
1368{
1369 struct ip_options *opt = &(IPCB(skb)->opt);
1370 struct ip_options *dopt = NULL;
1371
1372 if (opt && opt->optlen) {
1373 int opt_size = optlength(opt);
1374 dopt = kmalloc(opt_size, GFP_ATOMIC);
1375 if (dopt) {
1376 if (ip_options_echo(dopt, skb)) {
1377 kfree(dopt);
1378 dopt = NULL;
1379 }
1380 }
1381 }
1382 return dopt;
1383}
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398int sysctl_max_syn_backlog = 256;
1399
1400struct or_calltable or_ipv4 = {
1401 .family = PF_INET,
1402 .rtx_syn_ack = tcp_v4_send_synack,
1403 .send_ack = tcp_v4_or_send_ack,
1404 .destructor = tcp_v4_or_free,
1405 .send_reset = tcp_v4_send_reset,
1406};
1407
1408int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1409{
1410 struct tcp_opt tp;
1411 struct open_request *req;
1412 __u32 saddr = skb->nh.iph->saddr;
1413 __u32 daddr = skb->nh.iph->daddr;
1414 __u32 isn = TCP_SKB_CB(skb)->when;
1415 struct dst_entry *dst = NULL;
1416#ifdef CONFIG_SYN_COOKIES
1417 int want_cookie = 0;
1418#else
1419#define want_cookie 0
1420#endif
1421
1422
1423 if (((struct rtable *)skb->dst)->rt_flags &
1424 (RTCF_BROADCAST | RTCF_MULTICAST))
1425 goto drop;
1426
1427
1428
1429
1430
1431 if (tcp_synq_is_full(sk) && !isn) {
1432#ifdef CONFIG_SYN_COOKIES
1433 if (sysctl_tcp_syncookies) {
1434 want_cookie = 1;
1435 } else
1436#endif
1437 goto drop;
1438 }
1439
1440
1441
1442
1443
1444
1445 if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1446 goto drop;
1447
1448 req = tcp_openreq_alloc();
1449 if (!req)
1450 goto drop;
1451
1452 tcp_clear_options(&tp);
1453 tp.mss_clamp = 536;
1454 tp.user_mss = tcp_sk(sk)->user_mss;
1455
1456 tcp_parse_options(skb, &tp, 0);
1457
1458 if (want_cookie) {
1459 tcp_clear_options(&tp);
1460 tp.saw_tstamp = 0;
1461 }
1462
1463 if (tp.saw_tstamp && !tp.rcv_tsval) {
1464
1465
1466
1467
1468
1469 tp.saw_tstamp = 0;
1470 tp.tstamp_ok = 0;
1471 }
1472 tp.tstamp_ok = tp.saw_tstamp;
1473
1474 tcp_openreq_init(req, &tp, skb);
1475
1476 req->af.v4_req.loc_addr = daddr;
1477 req->af.v4_req.rmt_addr = saddr;
1478 req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
1479 req->class = &or_ipv4;
1480 if (!want_cookie)
1481 TCP_ECN_create_request(req, skb->h.th);
1482
1483 if (want_cookie) {
1484#ifdef CONFIG_SYN_COOKIES
1485 syn_flood_warning(skb);
1486#endif
1487 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1488 } else if (!isn) {
1489 struct inet_peer *peer = NULL;
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500 if (tp.saw_tstamp &&
1501 sysctl_tcp_tw_recycle &&
1502 (dst = tcp_v4_route_req(sk, req)) != NULL &&
1503 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1504 peer->v4daddr == saddr) {
1505 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1506 (s32)(peer->tcp_ts - req->ts_recent) >
1507 TCP_PAWS_WINDOW) {
1508 NET_INC_STATS_BH(PAWSPassiveRejected);
1509 dst_release(dst);
1510 goto drop_and_free;
1511 }
1512 }
1513
1514 else if (!sysctl_tcp_syncookies &&
1515 (sysctl_max_syn_backlog - tcp_synq_len(sk) <
1516 (sysctl_max_syn_backlog >> 2)) &&
1517 (!peer || !peer->tcp_ts_stamp) &&
1518 (!dst || !dst_metric(dst, RTAX_RTT))) {
1519
1520
1521
1522
1523
1524
1525
1526 NETDEBUG(if (net_ratelimit()) \
1527 printk(KERN_DEBUG "TCP: drop open "
1528 "request from %u.%u."
1529 "%u.%u/%u\n", \
1530 NIPQUAD(saddr),
1531 ntohs(skb->h.th->source)));
1532 dst_release(dst);
1533 goto drop_and_free;
1534 }
1535
1536 isn = tcp_v4_init_sequence(sk, skb);
1537 }
1538 req->snt_isn = isn;
1539
1540 if (tcp_v4_send_synack(sk, req, dst))
1541 goto drop_and_free;
1542
1543 if (want_cookie) {
1544 tcp_openreq_free(req);
1545 } else {
1546 tcp_v4_synq_add(sk, req);
1547 }
1548 return 0;
1549
1550drop_and_free:
1551 tcp_openreq_free(req);
1552drop:
1553 TCP_INC_STATS_BH(TcpAttemptFails);
1554 return 0;
1555}
1556
1557
1558
1559
1560
1561
1562struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1563 struct open_request *req,
1564 struct dst_entry *dst)
1565{
1566 struct inet_opt *newinet;
1567 struct tcp_opt *newtp;
1568 struct sock *newsk;
1569
1570 if (tcp_acceptq_is_full(sk))
1571 goto exit_overflow;
1572
1573 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1574 goto exit;
1575
1576 newsk = tcp_create_openreq_child(sk, req, skb);
1577 if (!newsk)
1578 goto exit;
1579
1580 newsk->sk_dst_cache = dst;
1581 tcp_v4_setup_caps(newsk, dst);
1582
1583 newtp = tcp_sk(newsk);
1584 newinet = inet_sk(newsk);
1585 newinet->daddr = req->af.v4_req.rmt_addr;
1586 newinet->rcv_saddr = req->af.v4_req.loc_addr;
1587 newinet->saddr = req->af.v4_req.loc_addr;
1588 newinet->opt = req->af.v4_req.opt;
1589 req->af.v4_req.opt = NULL;
1590 newinet->mc_index = tcp_v4_iif(skb);
1591 newinet->mc_ttl = skb->nh.iph->ttl;
1592 newtp->ext_header_len = 0;
1593 if (newinet->opt)
1594 newtp->ext_header_len = newinet->opt->optlen;
1595 newtp->ext2_header_len = dst->header_len;
1596 newinet->id = newtp->write_seq ^ jiffies;
1597
1598 tcp_sync_mss(newsk, dst_pmtu(dst));
1599 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);;
1600 tcp_initialize_rcv_mss(newsk);
1601
1602 __tcp_v4_hash(newsk, 0);
1603 __tcp_inherit_port(sk, newsk);
1604
1605 return newsk;
1606
1607exit_overflow:
1608 NET_INC_STATS_BH(ListenOverflows);
1609exit:
1610 NET_INC_STATS_BH(ListenDrops);
1611 dst_release(dst);
1612 return NULL;
1613}
1614
1615static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1616{
1617 struct tcphdr *th = skb->h.th;
1618 struct iphdr *iph = skb->nh.iph;
1619 struct tcp_opt *tp = tcp_sk(sk);
1620 struct sock *nsk;
1621 struct open_request **prev;
1622
1623 struct open_request *req = tcp_v4_search_req(tp, &prev, th->source,
1624 iph->saddr, iph->daddr);
1625 if (req)
1626 return tcp_check_req(sk, skb, req, prev);
1627
1628 nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
1629 th->source,
1630 skb->nh.iph->daddr,
1631 ntohs(th->dest),
1632 tcp_v4_iif(skb));
1633
1634 if (nsk) {
1635 if (nsk->sk_state != TCP_TIME_WAIT) {
1636 bh_lock_sock(nsk);
1637 return nsk;
1638 }
1639 tcp_tw_put((struct tcp_tw_bucket *)nsk);
1640 return NULL;
1641 }
1642
1643#ifdef CONFIG_SYN_COOKIES
1644 if (!th->rst && !th->syn && th->ack)
1645 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1646#endif
1647 return sk;
1648}
1649
1650static int tcp_v4_checksum_init(struct sk_buff *skb)
1651{
1652 if (skb->ip_summed == CHECKSUM_HW) {
1653 skb->ip_summed = CHECKSUM_UNNECESSARY;
1654 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1655 skb->nh.iph->daddr, skb->csum))
1656 return 0;
1657
1658 NETDEBUG(if (net_ratelimit())
1659 printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
1660 skb->ip_summed = CHECKSUM_NONE;
1661 }
1662 if (skb->len <= 76) {
1663 if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1664 skb->nh.iph->daddr,
1665 skb_checksum(skb, 0, skb->len, 0)))
1666 return -1;
1667 skb->ip_summed = CHECKSUM_UNNECESSARY;
1668 } else {
1669 skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
1670 skb->nh.iph->saddr,
1671 skb->nh.iph->daddr, 0);
1672 }
1673 return 0;
1674}
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1686{
1687 if (sk->sk_state == TCP_ESTABLISHED) {
1688 TCP_CHECK_TIMER(sk);
1689 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1690 goto reset;
1691 TCP_CHECK_TIMER(sk);
1692 return 0;
1693 }
1694
1695 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1696 goto csum_err;
1697
1698 if (sk->sk_state == TCP_LISTEN) {
1699 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1700 if (!nsk)
1701 goto discard;
1702
1703 if (nsk != sk) {
1704 if (tcp_child_process(sk, nsk, skb))
1705 goto reset;
1706 return 0;
1707 }
1708 }
1709
1710 TCP_CHECK_TIMER(sk);
1711 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1712 goto reset;
1713 TCP_CHECK_TIMER(sk);
1714 return 0;
1715
1716reset:
1717 tcp_v4_send_reset(skb);
1718discard:
1719 kfree_skb(skb);
1720
1721
1722
1723
1724
1725 return 0;
1726
1727csum_err:
1728 TCP_INC_STATS_BH(TcpInErrs);
1729 goto discard;
1730}
1731
1732
1733
1734
1735
1736int tcp_v4_rcv(struct sk_buff *skb)
1737{
1738 struct tcphdr *th;
1739 struct sock *sk;
1740 int ret;
1741
1742 if (skb->pkt_type != PACKET_HOST)
1743 goto discard_it;
1744
1745
1746 TCP_INC_STATS_BH(TcpInSegs);
1747
1748 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1749 goto discard_it;
1750
1751 th = skb->h.th;
1752
1753 if (th->doff < sizeof(struct tcphdr) / 4)
1754 goto bad_packet;
1755 if (!pskb_may_pull(skb, th->doff * 4))
1756 goto discard_it;
1757
1758
1759
1760
1761
1762 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1763 tcp_v4_checksum_init(skb) < 0))
1764 goto bad_packet;
1765
1766 th = skb->h.th;
1767 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1768 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1769 skb->len - th->doff * 4);
1770 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1771 TCP_SKB_CB(skb)->when = 0;
1772 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1773 TCP_SKB_CB(skb)->sacked = 0;
1774
1775 sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
1776 skb->nh.iph->daddr, ntohs(th->dest),
1777 tcp_v4_iif(skb));
1778
1779 if (!sk)
1780 goto no_tcp_socket;
1781
1782process:
1783 if (sk->sk_state == TCP_TIME_WAIT)
1784 goto do_time_wait;
1785
1786 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1787 goto discard_and_relse;
1788
1789 if (sk_filter(sk, skb, 0))
1790 goto discard_and_relse;
1791
1792 skb->dev = NULL;
1793
1794 bh_lock_sock(sk);
1795 ret = 0;
1796 if (!sock_owned_by_user(sk)) {
1797 if (!tcp_prequeue(sk, skb))
1798 ret = tcp_v4_do_rcv(sk, skb);
1799 } else
1800 sk_add_backlog(sk, skb);
1801 bh_unlock_sock(sk);
1802
1803 sock_put(sk);
1804
1805 return ret;
1806
1807no_tcp_socket:
1808 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1809 goto discard_it;
1810
1811 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1812bad_packet:
1813 TCP_INC_STATS_BH(TcpInErrs);
1814 } else {
1815 tcp_v4_send_reset(skb);
1816 }
1817
1818discard_it:
1819
1820 kfree_skb(skb);
1821 return 0;
1822
1823discard_and_relse:
1824 sock_put(sk);
1825 goto discard_it;
1826
1827do_time_wait:
1828 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1829 goto discard_and_relse;
1830
1831 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1832 TCP_INC_STATS_BH(TcpInErrs);
1833 goto discard_and_relse;
1834 }
1835 switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1836 skb, th, skb->len)) {
1837 case TCP_TW_SYN: {
1838 struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
1839 ntohs(th->dest),
1840 tcp_v4_iif(skb));
1841 if (sk2) {
1842 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1843 tcp_tw_put((struct tcp_tw_bucket *)sk);
1844 sk = sk2;
1845 goto process;
1846 }
1847
1848 }
1849 case TCP_TW_ACK:
1850 tcp_v4_timewait_ack(sk, skb);
1851 break;
1852 case TCP_TW_RST:
1853 goto no_tcp_socket;
1854 case TCP_TW_SUCCESS:;
1855 }
1856 goto discard_it;
1857}
1858
1859
1860
1861
1862static void __tcp_v4_rehash(struct sock *sk)
1863{
1864 sk->sk_prot->unhash(sk);
1865 sk->sk_prot->hash(sk);
1866}
1867
1868static int tcp_v4_reselect_saddr(struct sock *sk)
1869{
1870 struct inet_opt *inet = inet_sk(sk);
1871 int err;
1872 struct rtable *rt;
1873 __u32 old_saddr = inet->saddr;
1874 __u32 new_saddr;
1875 __u32 daddr = inet->daddr;
1876
1877 if (inet->opt && inet->opt->srr)
1878 daddr = inet->opt->faddr;
1879
1880
1881 err = ip_route_connect(&rt, daddr, 0,
1882 RT_TOS(inet->tos) | sk->sk_localroute,
1883 sk->sk_bound_dev_if,
1884 IPPROTO_TCP,
1885 inet->sport, inet->dport, sk);
1886 if (err)
1887 return err;
1888
1889 __sk_dst_set(sk, &rt->u.dst);
1890 tcp_v4_setup_caps(sk, &rt->u.dst);
1891 tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1892
1893 new_saddr = rt->rt_src;
1894
1895 if (new_saddr == old_saddr)
1896 return 0;
1897
1898 if (sysctl_ip_dynaddr > 1) {
1899 printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
1900 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1901 NIPQUAD(old_saddr),
1902 NIPQUAD(new_saddr));
1903 }
1904
1905 inet->saddr = new_saddr;
1906 inet->rcv_saddr = new_saddr;
1907
1908
1909
1910
1911
1912
1913
1914
1915 __tcp_v4_rehash(sk);
1916 return 0;
1917}
1918
1919int tcp_v4_rebuild_header(struct sock *sk)
1920{
1921 struct inet_opt *inet = inet_sk(sk);
1922 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1923 u32 daddr;
1924 int err;
1925
1926
1927 if (rt)
1928 return 0;
1929
1930
1931 daddr = inet->daddr;
1932 if (inet->opt && inet->opt->srr)
1933 daddr = inet->opt->faddr;
1934
1935 {
1936 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1937 .nl_u = { .ip4_u =
1938 { .daddr = daddr,
1939 .saddr = inet->saddr,
1940 .tos = RT_CONN_FLAGS(sk) } },
1941 .proto = IPPROTO_TCP,
1942 .uli_u = { .ports =
1943 { .sport = inet->sport,
1944 .dport = inet->dport } } };
1945
1946 err = ip_route_output_flow(&rt, &fl, sk, 0);
1947 }
1948 if (!err) {
1949 __sk_dst_set(sk, &rt->u.dst);
1950 tcp_v4_setup_caps(sk, &rt->u.dst);
1951 tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1952 return 0;
1953 }
1954
1955
1956 sk->sk_route_caps = 0;
1957
1958 if (!sysctl_ip_dynaddr ||
1959 sk->sk_state != TCP_SYN_SENT ||
1960 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1961 (err = tcp_v4_reselect_saddr(sk)) != 0)
1962 sk->sk_err_soft = -err;
1963
1964 return err;
1965}
1966
1967static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1968{
1969 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1970 struct inet_opt *inet = inet_sk(sk);
1971
1972 sin->sin_family = AF_INET;
1973 sin->sin_addr.s_addr = inet->daddr;
1974 sin->sin_port = inet->dport;
1975}
1976
1977
1978
1979
1980
1981
1982
1983int tcp_v4_remember_stamp(struct sock *sk)
1984{
1985 struct inet_opt *inet = inet_sk(sk);
1986 struct tcp_opt *tp = tcp_sk(sk);
1987 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1988 struct inet_peer *peer = NULL;
1989 int release_it = 0;
1990
1991 if (!rt || rt->rt_dst != inet->daddr) {
1992 peer = inet_getpeer(inet->daddr, 1);
1993 release_it = 1;
1994 } else {
1995 if (!rt->peer)
1996 rt_bind_peer(rt, 1);
1997 peer = rt->peer;
1998 }
1999
2000 if (peer) {
2001 if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
2002 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2003 peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
2004 peer->tcp_ts_stamp = tp->ts_recent_stamp;
2005 peer->tcp_ts = tp->ts_recent;
2006 }
2007 if (release_it)
2008 inet_putpeer(peer);
2009 return 1;
2010 }
2011
2012 return 0;
2013}
2014
2015int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
2016{
2017 struct inet_peer *peer = NULL;
2018
2019 peer = inet_getpeer(tw->tw_daddr, 1);
2020
2021 if (peer) {
2022 if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
2023 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2024 peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
2025 peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
2026 peer->tcp_ts = tw->tw_ts_recent;
2027 }
2028 inet_putpeer(peer);
2029 return 1;
2030 }
2031
2032 return 0;
2033}
2034
2035struct tcp_func ipv4_specific = {
2036 .queue_xmit = ip_queue_xmit,
2037 .send_check = tcp_v4_send_check,
2038 .rebuild_header = tcp_v4_rebuild_header,
2039 .conn_request = tcp_v4_conn_request,
2040 .syn_recv_sock = tcp_v4_syn_recv_sock,
2041 .remember_stamp = tcp_v4_remember_stamp,
2042 .net_header_len = sizeof(struct iphdr),
2043 .setsockopt = ip_setsockopt,
2044 .getsockopt = ip_getsockopt,
2045 .addr2sockaddr = v4_addr2sockaddr,
2046 .sockaddr_len = sizeof(struct sockaddr_in),
2047};
2048
2049
2050
2051
2052static int tcp_v4_init_sock(struct sock *sk)
2053{
2054 struct tcp_opt *tp = tcp_sk(sk);
2055
2056 skb_queue_head_init(&tp->out_of_order_queue);
2057 tcp_init_xmit_timers(sk);
2058 tcp_prequeue_init(tp);
2059
2060 tp->rto = TCP_TIMEOUT_INIT;
2061 tp->mdev = TCP_TIMEOUT_INIT;
2062
2063
2064
2065
2066
2067
2068 tp->snd_cwnd = 2;
2069
2070
2071
2072
2073 tp->snd_ssthresh = 0x7fffffff;
2074 tp->snd_cwnd_clamp = ~0;
2075 tp->mss_cache = 536;
2076
2077 tp->reordering = sysctl_tcp_reordering;
2078
2079 sk->sk_state = TCP_CLOSE;
2080
2081 sk->sk_write_space = tcp_write_space;
2082 sk->sk_use_write_queue = 1;
2083
2084 tp->af_specific = &ipv4_specific;
2085
2086 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2087 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2088
2089 atomic_inc(&tcp_sockets_allocated);
2090
2091 return 0;
2092}
2093
2094static int tcp_v4_destroy_sock(struct sock *sk)
2095{
2096 struct tcp_opt *tp = tcp_sk(sk);
2097
2098 tcp_clear_xmit_timers(sk);
2099
2100
2101 tcp_writequeue_purge(sk);
2102
2103
2104 __skb_queue_purge(&tp->out_of_order_queue);
2105
2106
2107 __skb_queue_purge(&tp->ucopy.prequeue);
2108
2109
2110 if (tp->bind_hash)
2111 tcp_put_port(sk);
2112
2113
2114 if (inet_sk(sk)->sndmsg_page)
2115 __free_page(inet_sk(sk)->sndmsg_page);
2116
2117 atomic_dec(&tcp_sockets_allocated);
2118
2119 return 0;
2120}
2121
2122#ifdef CONFIG_PROC_FS
2123
2124
2125static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
2126{
2127 return hlist_empty(head) ? NULL :
2128 list_entry(head->first, struct tcp_tw_bucket, tw_node);
2129}
2130
2131static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
2132{
2133 return tw->tw_node.next ?
2134 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2135}
2136
2137static void *listening_get_first(struct seq_file *seq)
2138{
2139 struct tcp_iter_state* st = seq->private;
2140 void *rc = NULL;
2141
2142 for (st->bucket = 0; st->bucket < TCP_LHTABLE_SIZE; ++st->bucket) {
2143 struct open_request *req;
2144 struct tcp_opt *tp;
2145 struct sock *sk = sk_head(&tcp_listening_hash[st->bucket]);
2146
2147 if (!sk)
2148 continue;
2149 if (sk->sk_family == st->family) {
2150 rc = sk;
2151 goto out;
2152 }
2153 tp = tcp_sk(sk);
2154 read_lock_bh(&tp->syn_wait_lock);
2155 if (tp->listen_opt && tp->listen_opt->qlen) {
2156 st->uid = sock_i_uid(sk);
2157 st->syn_wait_sk = sk;
2158 st->state = TCP_SEQ_STATE_OPENREQ;
2159 for (st->sbucket = 0; st->sbucket < TCP_SYNQ_HSIZE;
2160 ++st->sbucket) {
2161 for (req = tp->listen_opt->syn_table[st->sbucket];
2162 req; req = req->dl_next) {
2163 if (req->class->family != st->family)
2164 continue;
2165 rc = req;
2166 goto out;
2167 }
2168 }
2169 st->state = TCP_SEQ_STATE_LISTENING;
2170 }
2171 read_unlock_bh(&tp->syn_wait_lock);
2172 }
2173out:
2174 return rc;
2175}
2176
2177static void *listening_get_next(struct seq_file *seq, void *cur)
2178{
2179 struct tcp_opt *tp;
2180 struct hlist_node *node;
2181 struct sock *sk = cur;
2182 struct tcp_iter_state* st = seq->private;
2183
2184 ++st->num;
2185
2186 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2187 struct open_request *req = cur;
2188
2189 tp = tcp_sk(st->syn_wait_sk);
2190 req = req->dl_next;
2191 while (1) {
2192 while (req) {
2193 if (req->class->family == st->family) {
2194 cur = req;
2195 goto out;
2196 }
2197 req = req->dl_next;
2198 }
2199 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2200 break;
2201get_req:
2202 req = tp->listen_opt->syn_table[st->sbucket];
2203 }
2204 sk = sk_next(st->syn_wait_sk);
2205 st->state = TCP_SEQ_STATE_LISTENING;
2206 read_unlock_bh(&tp->syn_wait_lock);
2207 } else
2208 sk = sk_next(sk);
2209get_sk:
2210 sk_for_each_from(sk, node) {
2211 if (sk->sk_family == st->family) {
2212 cur = sk;
2213 goto out;
2214 }
2215 tp = tcp_sk(sk);
2216 read_lock_bh(&tp->syn_wait_lock);
2217 if (tp->listen_opt && tp->listen_opt->qlen) {
2218 st->uid = sock_i_uid(sk);
2219 st->syn_wait_sk = sk;
2220 st->state = TCP_SEQ_STATE_OPENREQ;
2221 st->sbucket = 0;
2222 goto get_req;
2223 }
2224 read_unlock_bh(&tp->syn_wait_lock);
2225 }
2226 if (++st->bucket < TCP_LHTABLE_SIZE) {
2227 sk = sk_head(&tcp_listening_hash[st->bucket]);
2228 goto get_sk;
2229 }
2230 cur = NULL;
2231out:
2232 return cur;
2233}
2234
2235static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2236{
2237 void *rc = listening_get_first(seq);
2238
2239 while (rc && *pos) {
2240 rc = listening_get_next(seq, rc);
2241 --*pos;
2242 }
2243 return rc;
2244}
2245
2246static void *established_get_first(struct seq_file *seq)
2247{
2248 struct tcp_iter_state* st = seq->private;
2249 void *rc = NULL;
2250
2251 for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
2252 struct sock *sk;
2253 struct hlist_node *node;
2254 struct tcp_tw_bucket *tw;
2255
2256 read_lock(&tcp_ehash[st->bucket].lock);
2257 sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
2258 if (sk->sk_family != st->family) {
2259 continue;
2260 }
2261 rc = sk;
2262 goto out;
2263 }
2264 st->state = TCP_SEQ_STATE_TIME_WAIT;
2265 tw_for_each(tw, node,
2266 &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
2267 if (tw->tw_family != st->family) {
2268 continue;
2269 }
2270 rc = tw;
2271 goto out;
2272 }
2273 read_unlock(&tcp_ehash[st->bucket].lock);
2274 st->state = TCP_SEQ_STATE_ESTABLISHED;
2275 }
2276out:
2277 return rc;
2278}
2279
2280static void *established_get_next(struct seq_file *seq, void *cur)
2281{
2282 struct sock *sk = cur;
2283 struct tcp_tw_bucket *tw;
2284 struct hlist_node *node;
2285 struct tcp_iter_state* st = seq->private;
2286
2287 ++st->num;
2288
2289 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2290 tw = cur;
2291 tw = tw_next(tw);
2292get_tw:
2293 while (tw && tw->tw_family != st->family) {
2294 tw = tw_next(tw);
2295 }
2296 if (tw) {
2297 cur = tw;
2298 goto out;
2299 }
2300 read_unlock(&tcp_ehash[st->bucket].lock);
2301 st->state = TCP_SEQ_STATE_ESTABLISHED;
2302 if (++st->bucket < tcp_ehash_size) {
2303 read_lock(&tcp_ehash[st->bucket].lock);
2304 sk = sk_head(&tcp_ehash[st->bucket].chain);
2305 } else {
2306 cur = NULL;
2307 goto out;
2308 }
2309 } else
2310 sk = sk_next(sk);
2311
2312 sk_for_each_from(sk, node) {
2313 if (sk->sk_family == st->family)
2314 goto found;
2315 }
2316
2317 st->state = TCP_SEQ_STATE_TIME_WAIT;
2318 tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
2319 goto get_tw;
2320found:
2321 cur = sk;
2322out:
2323 return cur;
2324}
2325
2326static void *established_get_idx(struct seq_file *seq, loff_t pos)
2327{
2328 void *rc = established_get_first(seq);
2329
2330 while (rc && pos) {
2331 rc = established_get_next(seq, rc);
2332 --pos;
2333 }
2334 return rc;
2335}
2336
2337static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2338{
2339 void *rc;
2340 struct tcp_iter_state* st = seq->private;
2341
2342 tcp_listen_lock();
2343 st->state = TCP_SEQ_STATE_LISTENING;
2344 rc = listening_get_idx(seq, &pos);
2345
2346 if (!rc) {
2347 tcp_listen_unlock();
2348 local_bh_disable();
2349 st->state = TCP_SEQ_STATE_ESTABLISHED;
2350 rc = established_get_idx(seq, pos);
2351 }
2352
2353 return rc;
2354}
2355
2356static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2357{
2358 struct tcp_iter_state* st = seq->private;
2359 st->state = TCP_SEQ_STATE_LISTENING;
2360 st->num = 0;
2361 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2362}
2363
2364static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2365{
2366 void *rc = NULL;
2367 struct tcp_iter_state* st;
2368
2369 if (v == SEQ_START_TOKEN) {
2370 rc = tcp_get_idx(seq, 0);
2371 goto out;
2372 }
2373 st = seq->private;
2374
2375 switch (st->state) {
2376 case TCP_SEQ_STATE_OPENREQ:
2377 case TCP_SEQ_STATE_LISTENING:
2378 rc = listening_get_next(seq, v);
2379 if (!rc) {
2380 tcp_listen_unlock();
2381 local_bh_disable();
2382 st->state = TCP_SEQ_STATE_ESTABLISHED;
2383 rc = established_get_first(seq);
2384 }
2385 break;
2386 case TCP_SEQ_STATE_ESTABLISHED:
2387 case TCP_SEQ_STATE_TIME_WAIT:
2388 rc = established_get_next(seq, v);
2389 break;
2390 }
2391out:
2392 ++*pos;
2393 return rc;
2394}
2395
2396static void tcp_seq_stop(struct seq_file *seq, void *v)
2397{
2398 struct tcp_iter_state* st = seq->private;
2399
2400 switch (st->state) {
2401 case TCP_SEQ_STATE_OPENREQ:
2402 if (v) {
2403 struct tcp_opt *tp = tcp_sk(st->syn_wait_sk);
2404 read_unlock_bh(&tp->syn_wait_lock);
2405 }
2406 case TCP_SEQ_STATE_LISTENING:
2407 if (v != SEQ_START_TOKEN)
2408 tcp_listen_unlock();
2409 break;
2410 case TCP_SEQ_STATE_TIME_WAIT:
2411 case TCP_SEQ_STATE_ESTABLISHED:
2412 if (v)
2413 read_unlock(&tcp_ehash[st->bucket].lock);
2414 local_bh_enable();
2415 break;
2416 }
2417}
2418
2419static int tcp_seq_open(struct inode *inode, struct file *file)
2420{
2421 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2422 struct seq_file *seq;
2423 struct tcp_iter_state *s;
2424 int rc;
2425
2426 if (unlikely(afinfo == NULL))
2427 return -EINVAL;
2428
2429 s = kmalloc(sizeof(*s), GFP_KERNEL);
2430 if (!s)
2431 return -ENOMEM;
2432 memset(s, 0, sizeof(*s));
2433 s->family = afinfo->family;
2434 s->seq_ops.start = tcp_seq_start;
2435 s->seq_ops.next = tcp_seq_next;
2436 s->seq_ops.show = afinfo->seq_show;
2437 s->seq_ops.stop = tcp_seq_stop;
2438
2439 rc = seq_open(file, &s->seq_ops);
2440 if (rc)
2441 goto out_kfree;
2442 seq = file->private_data;
2443 seq->private = s;
2444out:
2445 return rc;
2446out_kfree:
2447 kfree(s);
2448 goto out;
2449}
2450
2451int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2452{
2453 int rc = 0;
2454 struct proc_dir_entry *p;
2455
2456 if (!afinfo)
2457 return -EINVAL;
2458 afinfo->seq_fops->owner = afinfo->owner;
2459 afinfo->seq_fops->open = tcp_seq_open;
2460 afinfo->seq_fops->read = seq_read;
2461 afinfo->seq_fops->llseek = seq_lseek;
2462 afinfo->seq_fops->release = seq_release_private;
2463
2464 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2465 if (p)
2466 p->data = afinfo;
2467 else
2468 rc = -ENOMEM;
2469 return rc;
2470}
2471
2472void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2473{
2474 if (!afinfo)
2475 return;
2476 proc_net_remove(afinfo->name);
2477 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2478}
2479
2480static void get_openreq4(struct sock *sk, struct open_request *req,
2481 char *tmpbuf, int i, int uid)
2482{
2483 int ttd = req->expires - jiffies;
2484
2485 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2486 " %02X %08X:%08X %02X:%08X %08X %5d %8d %u %d %p",
2487 i,
2488 req->af.v4_req.loc_addr,
2489 ntohs(inet_sk(sk)->sport),
2490 req->af.v4_req.rmt_addr,
2491 ntohs(req->rmt_port),
2492 TCP_SYN_RECV,
2493 0, 0,
2494 1,
2495 jiffies_to_clock_t(ttd),
2496 req->retrans,
2497 uid,
2498 0,
2499 0,
2500 atomic_read(&sk->sk_refcnt),
2501 req);
2502}
2503
2504static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2505{
2506 int timer_active;
2507 unsigned long timer_expires;
2508 struct tcp_opt *tp = tcp_sk(sp);
2509 struct inet_opt *inet = inet_sk(sp);
2510 unsigned int dest = inet->daddr;
2511 unsigned int src = inet->rcv_saddr;
2512 __u16 destp = ntohs(inet->dport);
2513 __u16 srcp = ntohs(inet->sport);
2514
2515 if (tp->pending == TCP_TIME_RETRANS) {
2516 timer_active = 1;
2517 timer_expires = tp->timeout;
2518 } else if (tp->pending == TCP_TIME_PROBE0) {
2519 timer_active = 4;
2520 timer_expires = tp->timeout;
2521 } else if (timer_pending(&sp->sk_timer)) {
2522 timer_active = 2;
2523 timer_expires = sp->sk_timer.expires;
2524 } else {
2525 timer_active = 0;
2526 timer_expires = jiffies;
2527 }
2528
2529 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2530 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2531 i, src, srcp, dest, destp, sp->sk_state,
2532 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
2533 timer_active,
2534 jiffies_to_clock_t(timer_expires - jiffies),
2535 tp->retransmits,
2536 sock_i_uid(sp),
2537 tp->probes_out,
2538 sock_i_ino(sp),
2539 atomic_read(&sp->sk_refcnt), sp,
2540 tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
2541 tp->snd_cwnd,
2542 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2543}
2544
2545static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
2546{
2547 unsigned int dest, src;
2548 __u16 destp, srcp;
2549 int ttd = tw->tw_ttd - jiffies;
2550
2551 if (ttd < 0)
2552 ttd = 0;
2553
2554 dest = tw->tw_daddr;
2555 src = tw->tw_rcv_saddr;
2556 destp = ntohs(tw->tw_dport);
2557 srcp = ntohs(tw->tw_sport);
2558
2559 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2560 " %02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
2561 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2562 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2563 atomic_read(&tw->tw_refcnt), tw);
2564}
2565
2566#define TMPSZ 150
2567
2568static int tcp4_seq_show(struct seq_file *seq, void *v)
2569{
2570 struct tcp_iter_state* st;
2571 char tmpbuf[TMPSZ + 1];
2572
2573 if (v == SEQ_START_TOKEN) {
2574 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2575 " sl local_address rem_address st tx_queue "
2576 "rx_queue tr tm->when retrnsmt uid timeout "
2577 "inode");
2578 goto out;
2579 }
2580 st = seq->private;
2581
2582 switch (st->state) {
2583 case TCP_SEQ_STATE_LISTENING:
2584 case TCP_SEQ_STATE_ESTABLISHED:
2585 get_tcp4_sock(v, tmpbuf, st->num);
2586 break;
2587 case TCP_SEQ_STATE_OPENREQ:
2588 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2589 break;
2590 case TCP_SEQ_STATE_TIME_WAIT:
2591 get_timewait4_sock(v, tmpbuf, st->num);
2592 break;
2593 }
2594 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2595out:
2596 return 0;
2597}
2598
2599static struct file_operations tcp4_seq_fops;
2600static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2601 .owner = THIS_MODULE,
2602 .name = "tcp",
2603 .family = AF_INET,
2604 .seq_show = tcp4_seq_show,
2605 .seq_fops = &tcp4_seq_fops,
2606};
2607
2608int __init tcp4_proc_init(void)
2609{
2610 return tcp_proc_register(&tcp4_seq_afinfo);
2611}
2612
2613void tcp4_proc_exit(void)
2614{
2615 tcp_proc_unregister(&tcp4_seq_afinfo);
2616}
2617#endif
2618
2619struct proto tcp_prot = {
2620 .name = "TCP",
2621 .close = tcp_close,
2622 .connect = tcp_v4_connect,
2623 .disconnect = tcp_disconnect,
2624 .accept = tcp_accept,
2625 .ioctl = tcp_ioctl,
2626 .init = tcp_v4_init_sock,
2627 .destroy = tcp_v4_destroy_sock,
2628 .shutdown = tcp_shutdown,
2629 .setsockopt = tcp_setsockopt,
2630 .getsockopt = tcp_getsockopt,
2631 .sendmsg = tcp_sendmsg,
2632 .recvmsg = tcp_recvmsg,
2633 .backlog_rcv = tcp_v4_do_rcv,
2634 .hash = tcp_v4_hash,
2635 .unhash = tcp_unhash,
2636 .get_port = tcp_v4_get_port,
2637};
2638
2639
2640
2641void __init tcp_v4_init(struct net_proto_family *ops)
2642{
2643 int err = sock_create(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket);
2644 if (err < 0)
2645 panic("Failed to create the TCP control socket.\n");
2646 tcp_socket->sk->sk_allocation = GFP_ATOMIC;
2647 inet_sk(tcp_socket->sk)->uc_ttl = -1;
2648
2649
2650
2651
2652
2653 tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
2654}
2655
2656EXPORT_SYMBOL(ipv4_specific);
2657EXPORT_SYMBOL(tcp_bind_hash);
2658EXPORT_SYMBOL(tcp_bucket_create);
2659EXPORT_SYMBOL(tcp_hashinfo);
2660EXPORT_SYMBOL(tcp_inherit_port);
2661EXPORT_SYMBOL(tcp_listen_wlock);
2662EXPORT_SYMBOL(tcp_port_rover);
2663EXPORT_SYMBOL(tcp_prot);
2664EXPORT_SYMBOL(tcp_put_port);
2665EXPORT_SYMBOL(tcp_unhash);
2666EXPORT_SYMBOL(tcp_v4_conn_request);
2667EXPORT_SYMBOL(tcp_v4_connect);
2668EXPORT_SYMBOL(tcp_v4_do_rcv);
2669EXPORT_SYMBOL(tcp_v4_lookup_listener);
2670EXPORT_SYMBOL(tcp_v4_rebuild_header);
2671EXPORT_SYMBOL(tcp_v4_remember_stamp);
2672EXPORT_SYMBOL(tcp_v4_send_check);
2673EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2674
2675#ifdef CONFIG_PROC_FS
2676EXPORT_SYMBOL(tcp_proc_register);
2677EXPORT_SYMBOL(tcp_proc_unregister);
2678#endif
2679#ifdef CONFIG_SYSCTL
2680EXPORT_SYMBOL(sysctl_local_port_range);
2681EXPORT_SYMBOL(sysctl_max_syn_backlog);
2682EXPORT_SYMBOL(sysctl_tcp_low_latency);
2683#endif
2684