1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/config.h>
56
57#include <linux/types.h>
58#include <linux/fcntl.h>
59#include <linux/module.h>
60#include <linux/random.h>
61#include <linux/cache.h>
62#include <linux/jhash.h>
63#include <linux/init.h>
64#include <linux/times.h>
65
66#include <net/icmp.h>
67#include <net/tcp.h>
68#include <net/ipv6.h>
69#include <net/inet_common.h>
70#include <net/xfrm.h>
71
72#include <linux/inet.h>
73#include <linux/ipv6.h>
74#include <linux/stddef.h>
75#include <linux/proc_fs.h>
76#include <linux/seq_file.h>
77
78extern int sysctl_ip_dynaddr;
79int sysctl_tcp_tw_reuse;
80int sysctl_tcp_low_latency;
81
82
83#define ICMP_MIN_LENGTH 8
84
85
86static struct socket *tcp_socket;
87
88void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
89 struct sk_buff *skb);
90
91struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
92 .__tcp_lhash_lock = RW_LOCK_UNLOCKED,
93 .__tcp_lhash_users = ATOMIC_INIT(0),
94 .__tcp_lhash_wait
95 = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
96 .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED
97};
98
99
100
101
102
103
104int sysctl_local_port_range[2] = { 1024, 4999 };
105int tcp_port_rover = 1024 - 1;
106
107static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
108 __u32 faddr, __u16 fport)
109{
110 int h = (laddr ^ lport) ^ (faddr ^ fport);
111 h ^= h >> 16;
112 h ^= h >> 8;
113 return h & (tcp_ehash_size - 1);
114}
115
116static __inline__ int tcp_sk_hashfn(struct sock *sk)
117{
118 struct inet_opt *inet = inet_sk(sk);
119 __u32 laddr = inet->rcv_saddr;
120 __u16 lport = inet->num;
121 __u32 faddr = inet->daddr;
122 __u16 fport = inet->dport;
123
124 return tcp_hashfn(laddr, lport, faddr, fport);
125}
126
127
128
129
130struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
131 unsigned short snum)
132{
133 struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
134 SLAB_ATOMIC);
135 if (tb) {
136 tb->port = snum;
137 tb->fastreuse = 0;
138 INIT_HLIST_HEAD(&tb->owners);
139 hlist_add_head(&tb->node, &head->chain);
140 }
141 return tb;
142}
143
144
145void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
146{
147 if (hlist_empty(&tb->owners)) {
148 __hlist_del(&tb->node);
149 kmem_cache_free(tcp_bucket_cachep, tb);
150 }
151}
152
153
154static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
155{
156 struct tcp_bind_hashbucket *head =
157 &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
158 struct tcp_bind_bucket *tb;
159
160 spin_lock(&head->lock);
161 tb = tcp_sk(sk)->bind_hash;
162 sk_add_bind_node(child, &tb->owners);
163 tcp_sk(child)->bind_hash = tb;
164 spin_unlock(&head->lock);
165}
166
167inline void tcp_inherit_port(struct sock *sk, struct sock *child)
168{
169 local_bh_disable();
170 __tcp_inherit_port(sk, child);
171 local_bh_enable();
172}
173
174void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
175 unsigned short snum)
176{
177 inet_sk(sk)->num = snum;
178 sk_add_bind_node(sk, &tb->owners);
179 tcp_sk(sk)->bind_hash = tb;
180}
181
182static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
183{
184 const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
185 struct sock *sk2;
186 struct hlist_node *node;
187 int reuse = sk->sk_reuse;
188
189 sk_for_each_bound(sk2, node, &tb->owners) {
190 if (sk != sk2 &&
191 !tcp_v6_ipv6only(sk2) &&
192 (!sk->sk_bound_dev_if ||
193 !sk2->sk_bound_dev_if ||
194 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
195 if (!reuse || !sk2->sk_reuse ||
196 sk2->sk_state == TCP_LISTEN) {
197 const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
198 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
199 sk2_rcv_saddr == sk_rcv_saddr)
200 break;
201 }
202 }
203 }
204 return node != NULL;
205}
206
207
208
209
210static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
211{
212 struct tcp_bind_hashbucket *head;
213 struct hlist_node *node;
214 struct tcp_bind_bucket *tb;
215 int ret;
216
217 local_bh_disable();
218 if (!snum) {
219 int low = sysctl_local_port_range[0];
220 int high = sysctl_local_port_range[1];
221 int remaining = (high - low) + 1;
222 int rover;
223
224 spin_lock(&tcp_portalloc_lock);
225 rover = tcp_port_rover;
226 do {
227 rover++;
228 if (rover < low || rover > high)
229 rover = low;
230 head = &tcp_bhash[tcp_bhashfn(rover)];
231 spin_lock(&head->lock);
232 tb_for_each(tb, node, &head->chain)
233 if (tb->port == rover)
234 goto next;
235 break;
236 next:
237 spin_unlock(&head->lock);
238 } while (--remaining > 0);
239 tcp_port_rover = rover;
240 spin_unlock(&tcp_portalloc_lock);
241
242
243 ret = 1;
244 if (remaining <= 0)
245 goto fail;
246
247
248
249
250 snum = rover;
251 } else {
252 head = &tcp_bhash[tcp_bhashfn(snum)];
253 spin_lock(&head->lock);
254 tb_for_each(tb, node, &head->chain)
255 if (tb->port == snum)
256 goto tb_found;
257 }
258 tb = NULL;
259 goto tb_not_found;
260tb_found:
261 if (!hlist_empty(&tb->owners)) {
262 if (sk->sk_reuse > 1)
263 goto success;
264 if (tb->fastreuse > 0 &&
265 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
266 goto success;
267 } else {
268 ret = 1;
269 if (tcp_bind_conflict(sk, tb))
270 goto fail_unlock;
271 }
272 }
273tb_not_found:
274 ret = 1;
275 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
276 goto fail_unlock;
277 if (hlist_empty(&tb->owners)) {
278 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
279 tb->fastreuse = 1;
280 else
281 tb->fastreuse = 0;
282 } else if (tb->fastreuse &&
283 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
284 tb->fastreuse = 0;
285success:
286 if (!tcp_sk(sk)->bind_hash)
287 tcp_bind_hash(sk, tb, snum);
288 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
289 ret = 0;
290
291fail_unlock:
292 spin_unlock(&head->lock);
293fail:
294 local_bh_enable();
295 return ret;
296}
297
298
299
300
301static void __tcp_put_port(struct sock *sk)
302{
303 struct inet_opt *inet = inet_sk(sk);
304 struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
305 struct tcp_bind_bucket *tb;
306
307 spin_lock(&head->lock);
308 tb = tcp_sk(sk)->bind_hash;
309 __sk_del_bind_node(sk);
310 tcp_sk(sk)->bind_hash = NULL;
311 inet->num = 0;
312 tcp_bucket_destroy(tb);
313 spin_unlock(&head->lock);
314}
315
316void tcp_put_port(struct sock *sk)
317{
318 local_bh_disable();
319 __tcp_put_port(sk);
320 local_bh_enable();
321}
322
323
324
325
326
327
328
329
330void tcp_listen_wlock(void)
331{
332 write_lock(&tcp_lhash_lock);
333
334 if (atomic_read(&tcp_lhash_users)) {
335 DEFINE_WAIT(wait);
336
337 for (;;) {
338 prepare_to_wait_exclusive(&tcp_lhash_wait,
339 &wait, TASK_UNINTERRUPTIBLE);
340 if (!atomic_read(&tcp_lhash_users))
341 break;
342 write_unlock_bh(&tcp_lhash_lock);
343 schedule();
344 write_lock_bh(&tcp_lhash_lock);
345 }
346
347 finish_wait(&tcp_lhash_wait, &wait);
348 }
349}
350
351static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
352{
353 struct hlist_head *list;
354 rwlock_t *lock;
355
356 BUG_TRAP(sk_unhashed(sk));
357 if (listen_possible && sk->sk_state == TCP_LISTEN) {
358 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
359 lock = &tcp_lhash_lock;
360 tcp_listen_wlock();
361 } else {
362 list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
363 lock = &tcp_ehash[sk->sk_hashent].lock;
364 write_lock(lock);
365 }
366 __sk_add_node(sk, list);
367 sock_prot_inc_use(sk->sk_prot);
368 write_unlock(lock);
369 if (listen_possible && sk->sk_state == TCP_LISTEN)
370 wake_up(&tcp_lhash_wait);
371}
372
373static void tcp_v4_hash(struct sock *sk)
374{
375 if (sk->sk_state != TCP_CLOSE) {
376 local_bh_disable();
377 __tcp_v4_hash(sk, 1);
378 local_bh_enable();
379 }
380}
381
382void tcp_unhash(struct sock *sk)
383{
384 rwlock_t *lock;
385
386 if (sk_unhashed(sk))
387 goto ende;
388
389 if (sk->sk_state == TCP_LISTEN) {
390 local_bh_disable();
391 tcp_listen_wlock();
392 lock = &tcp_lhash_lock;
393 } else {
394 struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
395 lock = &head->lock;
396 write_lock_bh(&head->lock);
397 }
398
399 if (__sk_del_node_init(sk))
400 sock_prot_dec_use(sk->sk_prot);
401 write_unlock_bh(lock);
402
403 ende:
404 if (sk->sk_state == TCP_LISTEN)
405 wake_up(&tcp_lhash_wait);
406}
407
408
409
410
411
412
413
414static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
415 unsigned short hnum, int dif)
416{
417 struct sock *result = NULL, *sk;
418 struct hlist_node *node;
419 int score, hiscore;
420
421 hiscore=-1;
422 sk_for_each(sk, node, head) {
423 struct inet_opt *inet = inet_sk(sk);
424
425 if (inet->num == hnum && !ipv6_only_sock(sk)) {
426 __u32 rcv_saddr = inet->rcv_saddr;
427
428 score = (sk->sk_family == PF_INET ? 1 : 0);
429 if (rcv_saddr) {
430 if (rcv_saddr != daddr)
431 continue;
432 score+=2;
433 }
434 if (sk->sk_bound_dev_if) {
435 if (sk->sk_bound_dev_if != dif)
436 continue;
437 score+=2;
438 }
439 if (score == 5)
440 return sk;
441 if (score > hiscore) {
442 hiscore = score;
443 result = sk;
444 }
445 }
446 }
447 return result;
448}
449
450
451inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum,
452 int dif)
453{
454 struct sock *sk = NULL;
455 struct hlist_head *head;
456
457 read_lock(&tcp_lhash_lock);
458 head = &tcp_listening_hash[tcp_lhashfn(hnum)];
459 if (!hlist_empty(head)) {
460 struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
461
462 if (inet->num == hnum && !sk->sk_node.next &&
463 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
464 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
465 !sk->sk_bound_dev_if)
466 goto sherry_cache;
467 sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
468 }
469 if (sk) {
470sherry_cache:
471 sock_hold(sk);
472 }
473 read_unlock(&tcp_lhash_lock);
474 return sk;
475}
476
477
478
479
480
481
482
483static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
484 u32 daddr, u16 hnum,
485 int dif)
486{
487 struct tcp_ehash_bucket *head;
488 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
489 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
490 struct sock *sk;
491 struct hlist_node *node;
492
493
494
495 int hash = tcp_hashfn(daddr, hnum, saddr, sport);
496 head = &tcp_ehash[hash];
497 read_lock(&head->lock);
498 sk_for_each(sk, node, &head->chain) {
499 if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
500 goto hit;
501 }
502
503
504 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
505 if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
506 goto hit;
507 }
508 sk = NULL;
509out:
510 read_unlock(&head->lock);
511 return sk;
512hit:
513 sock_hold(sk);
514 goto out;
515}
516
517static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
518 u32 daddr, u16 hnum, int dif)
519{
520 struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
521 daddr, hnum, dif);
522
523 return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
524}
525
526inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
527 u16 dport, int dif)
528{
529 struct sock *sk;
530
531 local_bh_disable();
532 sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
533 local_bh_enable();
534
535 return sk;
536}
537
538static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
539{
540 return secure_tcp_sequence_number(skb->nh.iph->daddr,
541 skb->nh.iph->saddr,
542 skb->h.th->dest,
543 skb->h.th->source);
544}
545
546
547static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
548 struct tcp_tw_bucket **twp)
549{
550 struct inet_opt *inet = inet_sk(sk);
551 u32 daddr = inet->rcv_saddr;
552 u32 saddr = inet->daddr;
553 int dif = sk->sk_bound_dev_if;
554 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
555 __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
556 int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
557 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
558 struct sock *sk2;
559 struct hlist_node *node;
560 struct tcp_tw_bucket *tw;
561
562 write_lock(&head->lock);
563
564
565 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
566 tw = (struct tcp_tw_bucket *)sk2;
567
568 if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
569 struct tcp_opt *tp = tcp_sk(sk);
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585 if (tw->tw_ts_recent_stamp &&
586 (!twp || (sysctl_tcp_tw_reuse &&
587 xtime.tv_sec -
588 tw->tw_ts_recent_stamp > 1))) {
589 if ((tp->write_seq =
590 tw->tw_snd_nxt + 65535 + 2) == 0)
591 tp->write_seq = 1;
592 tp->ts_recent = tw->tw_ts_recent;
593 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
594 sock_hold(sk2);
595 goto unique;
596 } else
597 goto not_unique;
598 }
599 }
600 tw = NULL;
601
602
603 sk_for_each(sk2, node, &head->chain) {
604 if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
605 goto not_unique;
606 }
607
608unique:
609
610
611 inet->num = lport;
612 inet->sport = htons(lport);
613 sk->sk_hashent = hash;
614 BUG_TRAP(sk_unhashed(sk));
615 __sk_add_node(sk, &head->chain);
616 sock_prot_inc_use(sk->sk_prot);
617 write_unlock(&head->lock);
618
619 if (twp) {
620 *twp = tw;
621 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
622 } else if (tw) {
623
624 tcp_tw_deschedule(tw);
625 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
626
627 tcp_tw_put(tw);
628 }
629
630 return 0;
631
632not_unique:
633 write_unlock(&head->lock);
634 return -EADDRNOTAVAIL;
635}
636
637
638
639
640static int tcp_v4_hash_connect(struct sock *sk)
641{
642 unsigned short snum = inet_sk(sk)->num;
643 struct tcp_bind_hashbucket *head;
644 struct tcp_bind_bucket *tb;
645 int ret;
646
647 if (!snum) {
648 int rover;
649 int low = sysctl_local_port_range[0];
650 int high = sysctl_local_port_range[1];
651 int remaining = (high - low) + 1;
652 struct hlist_node *node;
653 struct tcp_tw_bucket *tw = NULL;
654
655 local_bh_disable();
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670 spin_lock(&tcp_portalloc_lock);
671 rover = tcp_port_rover;
672
673 do {
674 rover++;
675 if ((rover < low) || (rover > high))
676 rover = low;
677 head = &tcp_bhash[tcp_bhashfn(rover)];
678 spin_lock(&head->lock);
679
680
681
682
683
684 tb_for_each(tb, node, &head->chain) {
685 if (tb->port == rover) {
686 BUG_TRAP(!hlist_empty(&tb->owners));
687 if (tb->fastreuse >= 0)
688 goto next_port;
689 if (!__tcp_v4_check_established(sk,
690 rover,
691 &tw))
692 goto ok;
693 goto next_port;
694 }
695 }
696
697 tb = tcp_bucket_create(head, rover);
698 if (!tb) {
699 spin_unlock(&head->lock);
700 break;
701 }
702 tb->fastreuse = -1;
703 goto ok;
704
705 next_port:
706 spin_unlock(&head->lock);
707 } while (--remaining > 0);
708 tcp_port_rover = rover;
709 spin_unlock(&tcp_portalloc_lock);
710
711 local_bh_enable();
712
713 return -EADDRNOTAVAIL;
714
715ok:
716
717 tcp_port_rover = rover;
718 spin_unlock(&tcp_portalloc_lock);
719
720 tcp_bind_hash(sk, tb, rover);
721 if (sk_unhashed(sk)) {
722 inet_sk(sk)->sport = htons(rover);
723 __tcp_v4_hash(sk, 0);
724 }
725 spin_unlock(&head->lock);
726
727 if (tw) {
728 tcp_tw_deschedule(tw);
729 tcp_tw_put(tw);
730 }
731
732 ret = 0;
733 goto out;
734 }
735
736 head = &tcp_bhash[tcp_bhashfn(snum)];
737 tb = tcp_sk(sk)->bind_hash;
738 spin_lock_bh(&head->lock);
739 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
740 __tcp_v4_hash(sk, 0);
741 spin_unlock_bh(&head->lock);
742 return 0;
743 } else {
744 spin_unlock(&head->lock);
745
746 ret = __tcp_v4_check_established(sk, snum, NULL);
747out:
748 local_bh_enable();
749 return ret;
750 }
751}
752
753
754int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
755{
756 struct inet_opt *inet = inet_sk(sk);
757 struct tcp_opt *tp = tcp_sk(sk);
758 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
759 struct rtable *rt;
760 u32 daddr, nexthop;
761 int tmp;
762 int err;
763
764 if (addr_len < sizeof(struct sockaddr_in))
765 return -EINVAL;
766
767 if (usin->sin_family != AF_INET)
768 return -EAFNOSUPPORT;
769
770 nexthop = daddr = usin->sin_addr.s_addr;
771 if (inet->opt && inet->opt->srr) {
772 if (!daddr)
773 return -EINVAL;
774 nexthop = inet->opt->faddr;
775 }
776
777 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
778 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
779 IPPROTO_TCP,
780 inet->sport, usin->sin_port, sk);
781 if (tmp < 0)
782 return tmp;
783
784 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
785 ip_rt_put(rt);
786 return -ENETUNREACH;
787 }
788
789 if (!inet->opt || !inet->opt->srr)
790 daddr = rt->rt_dst;
791
792 if (!inet->saddr)
793 inet->saddr = rt->rt_src;
794 inet->rcv_saddr = inet->saddr;
795
796 if (tp->ts_recent_stamp && inet->daddr != daddr) {
797
798 tp->ts_recent = 0;
799 tp->ts_recent_stamp = 0;
800 tp->write_seq = 0;
801 }
802
803 if (sysctl_tcp_tw_recycle &&
804 !tp->ts_recent_stamp && rt->rt_dst == daddr) {
805 struct inet_peer *peer = rt_get_peer(rt);
806
807
808
809
810
811
812 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
813 tp->ts_recent_stamp = peer->tcp_ts_stamp;
814 tp->ts_recent = peer->tcp_ts;
815 }
816 }
817
818 inet->dport = usin->sin_port;
819 inet->daddr = daddr;
820
821 tp->ext_header_len = 0;
822 if (inet->opt)
823 tp->ext_header_len = inet->opt->optlen;
824
825 tp->mss_clamp = 536;
826
827
828
829
830
831
832 tcp_set_state(sk, TCP_SYN_SENT);
833 err = tcp_v4_hash_connect(sk);
834 if (err)
835 goto failure;
836
837 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
838 if (err)
839 goto failure;
840
841
842 __sk_dst_set(sk, &rt->u.dst);
843 tcp_v4_setup_caps(sk, &rt->u.dst);
844 tp->ext2_header_len = rt->u.dst.header_len;
845
846 if (!tp->write_seq)
847 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
848 inet->daddr,
849 inet->sport,
850 usin->sin_port);
851
852 inet->id = tp->write_seq ^ jiffies;
853
854 err = tcp_connect(sk);
855 rt = NULL;
856 if (err)
857 goto failure;
858
859 return 0;
860
861failure:
862
863 tcp_set_state(sk, TCP_CLOSE);
864 ip_rt_put(rt);
865 sk->sk_route_caps = 0;
866 inet->dport = 0;
867 return err;
868}
869
870static __inline__ int tcp_v4_iif(struct sk_buff *skb)
871{
872 return ((struct rtable *)skb->dst)->rt_iif;
873}
874
875static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
876{
877 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
878}
879
880static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
881 struct open_request ***prevp,
882 __u16 rport,
883 __u32 raddr, __u32 laddr)
884{
885 struct tcp_listen_opt *lopt = tp->listen_opt;
886 struct open_request *req, **prev;
887
888 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
889 (req = *prev) != NULL;
890 prev = &req->dl_next) {
891 if (req->rmt_port == rport &&
892 req->af.v4_req.rmt_addr == raddr &&
893 req->af.v4_req.loc_addr == laddr &&
894 TCP_INET_FAMILY(req->class->family)) {
895 BUG_TRAP(!req->sk);
896 *prevp = prev;
897 break;
898 }
899 }
900
901 return req;
902}
903
904static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
905{
906 struct tcp_opt *tp = tcp_sk(sk);
907 struct tcp_listen_opt *lopt = tp->listen_opt;
908 u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
909
910 req->expires = jiffies + TCP_TIMEOUT_INIT;
911 req->retrans = 0;
912 req->sk = NULL;
913 req->dl_next = lopt->syn_table[h];
914
915 write_lock(&tp->syn_wait_lock);
916 lopt->syn_table[h] = req;
917 write_unlock(&tp->syn_wait_lock);
918
919 tcp_synq_added(sk);
920}
921
922
923
924
925
926static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
927 u32 mtu)
928{
929 struct dst_entry *dst;
930 struct inet_opt *inet = inet_sk(sk);
931 struct tcp_opt *tp = tcp_sk(sk);
932
933
934
935
936
937 if (sk->sk_state == TCP_LISTEN)
938 return;
939
940
941
942
943
944
945
946 if ((dst = __sk_dst_check(sk, 0)) == NULL)
947 return;
948
949 dst->ops->update_pmtu(dst, mtu);
950
951
952
953
954 if (mtu < dst_pmtu(dst) && ip_dont_fragment(sk, dst))
955 sk->sk_err_soft = EMSGSIZE;
956
957 mtu = dst_pmtu(dst);
958
959 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
960 tp->pmtu_cookie > mtu) {
961 tcp_sync_mss(sk, mtu);
962
963
964
965
966
967
968 tcp_simple_retransmit(sk);
969 }
970}
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988void tcp_v4_err(struct sk_buff *skb, u32 info)
989{
990 struct iphdr *iph = (struct iphdr *)skb->data;
991 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
992 struct tcp_opt *tp;
993 struct inet_opt *inet;
994 int type = skb->h.icmph->type;
995 int code = skb->h.icmph->code;
996 struct sock *sk;
997 __u32 seq;
998 int err;
999
1000 if (skb->len < (iph->ihl << 2) + 8) {
1001 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
1002 return;
1003 }
1004
1005 sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
1006 th->source, tcp_v4_iif(skb));
1007 if (!sk) {
1008 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
1009 return;
1010 }
1011 if (sk->sk_state == TCP_TIME_WAIT) {
1012 tcp_tw_put((struct tcp_tw_bucket *)sk);
1013 return;
1014 }
1015
1016 bh_lock_sock(sk);
1017
1018
1019
1020 if (sock_owned_by_user(sk))
1021 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
1022
1023 if (sk->sk_state == TCP_CLOSE)
1024 goto out;
1025
1026 tp = tcp_sk(sk);
1027 seq = ntohl(th->seq);
1028 if (sk->sk_state != TCP_LISTEN &&
1029 !between(seq, tp->snd_una, tp->snd_nxt)) {
1030 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
1031 goto out;
1032 }
1033
1034 switch (type) {
1035 case ICMP_SOURCE_QUENCH:
1036
1037
1038
1039 if (!sock_owned_by_user(sk))
1040 tcp_enter_cwr(tp);
1041 goto out;
1042 case ICMP_PARAMETERPROB:
1043 err = EPROTO;
1044 break;
1045 case ICMP_DEST_UNREACH:
1046 if (code > NR_ICMP_UNREACH)
1047 goto out;
1048
1049 if (code == ICMP_FRAG_NEEDED) {
1050 if (!sock_owned_by_user(sk))
1051 do_pmtu_discovery(sk, iph, info);
1052 goto out;
1053 }
1054
1055 err = icmp_err_convert[code].errno;
1056 break;
1057 case ICMP_TIME_EXCEEDED:
1058 err = EHOSTUNREACH;
1059 break;
1060 default:
1061 goto out;
1062 }
1063
1064 switch (sk->sk_state) {
1065 struct open_request *req, **prev;
1066 case TCP_LISTEN:
1067 if (sock_owned_by_user(sk))
1068 goto out;
1069
1070 req = tcp_v4_search_req(tp, &prev, th->dest,
1071 iph->daddr, iph->saddr);
1072 if (!req)
1073 goto out;
1074
1075
1076
1077
1078 BUG_TRAP(!req->sk);
1079
1080 if (seq != req->snt_isn) {
1081 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1082 goto out;
1083 }
1084
1085
1086
1087
1088
1089
1090
1091 tcp_synq_drop(sk, req, prev);
1092 goto out;
1093
1094 case TCP_SYN_SENT:
1095 case TCP_SYN_RECV:
1096
1097
1098 if (!sock_owned_by_user(sk)) {
1099 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1100 sk->sk_err = err;
1101
1102 sk->sk_error_report(sk);
1103
1104 tcp_done(sk);
1105 } else {
1106 sk->sk_err_soft = err;
1107 }
1108 goto out;
1109 }
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127 inet = inet_sk(sk);
1128 if (!sock_owned_by_user(sk) && inet->recverr) {
1129 sk->sk_err = err;
1130 sk->sk_error_report(sk);
1131 } else {
1132 sk->sk_err_soft = err;
1133 }
1134
1135out:
1136 bh_unlock_sock(sk);
1137 sock_put(sk);
1138}
1139
1140
1141void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
1142 struct sk_buff *skb)
1143{
1144 struct inet_opt *inet = inet_sk(sk);
1145
1146 if (skb->ip_summed == CHECKSUM_HW) {
1147 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
1148 skb->csum = offsetof(struct tcphdr, check);
1149 } else {
1150 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
1151 csum_partial((char *)th,
1152 th->doff << 2,
1153 skb->csum));
1154 }
1155}
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170static void tcp_v4_send_reset(struct sk_buff *skb)
1171{
1172 struct tcphdr *th = skb->h.th;
1173 struct tcphdr rth;
1174 struct ip_reply_arg arg;
1175
1176
1177 if (th->rst)
1178 return;
1179
1180 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
1181 return;
1182
1183
1184 memset(&rth, 0, sizeof(struct tcphdr));
1185 rth.dest = th->source;
1186 rth.source = th->dest;
1187 rth.doff = sizeof(struct tcphdr) / 4;
1188 rth.rst = 1;
1189
1190 if (th->ack) {
1191 rth.seq = th->ack_seq;
1192 } else {
1193 rth.ack = 1;
1194 rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
1195 skb->len - (th->doff << 2));
1196 }
1197
1198 memset(&arg, 0, sizeof arg);
1199 arg.iov[0].iov_base = (unsigned char *)&rth;
1200 arg.iov[0].iov_len = sizeof rth;
1201 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1202 skb->nh.iph->saddr,
1203 sizeof(struct tcphdr), IPPROTO_TCP, 0);
1204 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1205
1206 ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
1207
1208 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1209 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1210}
1211
1212
1213
1214
1215
1216static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
1217 u32 win, u32 ts)
1218{
1219 struct tcphdr *th = skb->h.th;
1220 struct {
1221 struct tcphdr th;
1222 u32 tsopt[3];
1223 } rep;
1224 struct ip_reply_arg arg;
1225
1226 memset(&rep.th, 0, sizeof(struct tcphdr));
1227 memset(&arg, 0, sizeof arg);
1228
1229 arg.iov[0].iov_base = (unsigned char *)&rep;
1230 arg.iov[0].iov_len = sizeof(rep.th);
1231 if (ts) {
1232 rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1233 (TCPOPT_TIMESTAMP << 8) |
1234 TCPOLEN_TIMESTAMP);
1235 rep.tsopt[1] = htonl(tcp_time_stamp);
1236 rep.tsopt[2] = htonl(ts);
1237 arg.iov[0].iov_len = sizeof(rep);
1238 }
1239
1240
1241 rep.th.dest = th->source;
1242 rep.th.source = th->dest;
1243 rep.th.doff = arg.iov[0].iov_len / 4;
1244 rep.th.seq = htonl(seq);
1245 rep.th.ack_seq = htonl(ack);
1246 rep.th.ack = 1;
1247 rep.th.window = htons(win);
1248
1249 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1250 skb->nh.iph->saddr,
1251 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1252 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1253
1254 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1255
1256 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1257}
1258
1259static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1260{
1261 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1262
1263 tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1264 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1265
1266 tcp_tw_put(tw);
1267}
1268
1269static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
1270{
1271 tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,
1272 req->ts_recent);
1273}
1274
1275static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1276 struct open_request *req)
1277{
1278 struct rtable *rt;
1279 struct ip_options *opt = req->af.v4_req.opt;
1280 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1281 .nl_u = { .ip4_u =
1282 { .daddr = ((opt && opt->srr) ?
1283 opt->faddr :
1284 req->af.v4_req.rmt_addr),
1285 .saddr = req->af.v4_req.loc_addr,
1286 .tos = RT_CONN_FLAGS(sk) } },
1287 .proto = IPPROTO_TCP,
1288 .uli_u = { .ports =
1289 { .sport = inet_sk(sk)->sport,
1290 .dport = req->rmt_port } } };
1291
1292 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1293 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1294 return NULL;
1295 }
1296 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1297 ip_rt_put(rt);
1298 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1299 return NULL;
1300 }
1301 return &rt->u.dst;
1302}
1303
1304
1305
1306
1307
1308
1309static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1310 struct dst_entry *dst)
1311{
1312 int err = -1;
1313 struct sk_buff * skb;
1314
1315
1316 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1317 goto out;
1318
1319 skb = tcp_make_synack(sk, dst, req);
1320
1321 if (skb) {
1322 struct tcphdr *th = skb->h.th;
1323
1324 th->check = tcp_v4_check(th, skb->len,
1325 req->af.v4_req.loc_addr,
1326 req->af.v4_req.rmt_addr,
1327 csum_partial((char *)th, skb->len,
1328 skb->csum));
1329
1330 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
1331 req->af.v4_req.rmt_addr,
1332 req->af.v4_req.opt);
1333 if (err == NET_XMIT_CN)
1334 err = 0;
1335 }
1336
1337out:
1338 dst_release(dst);
1339 return err;
1340}
1341
1342
1343
1344
1345static void tcp_v4_or_free(struct open_request *req)
1346{
1347 if (req->af.v4_req.opt)
1348 kfree(req->af.v4_req.opt);
1349}
1350
1351static inline void syn_flood_warning(struct sk_buff *skb)
1352{
1353 static unsigned long warntime;
1354
1355 if (time_after(jiffies, (warntime + HZ * 60))) {
1356 warntime = jiffies;
1357 printk(KERN_INFO
1358 "possible SYN flooding on port %d. Sending cookies.\n",
1359 ntohs(skb->h.th->dest));
1360 }
1361}
1362
1363
1364
1365
1366static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1367 struct sk_buff *skb)
1368{
1369 struct ip_options *opt = &(IPCB(skb)->opt);
1370 struct ip_options *dopt = NULL;
1371
1372 if (opt && opt->optlen) {
1373 int opt_size = optlength(opt);
1374 dopt = kmalloc(opt_size, GFP_ATOMIC);
1375 if (dopt) {
1376 if (ip_options_echo(dopt, skb)) {
1377 kfree(dopt);
1378 dopt = NULL;
1379 }
1380 }
1381 }
1382 return dopt;
1383}
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398int sysctl_max_syn_backlog = 256;
1399
1400struct or_calltable or_ipv4 = {
1401 .family = PF_INET,
1402 .rtx_syn_ack = tcp_v4_send_synack,
1403 .send_ack = tcp_v4_or_send_ack,
1404 .destructor = tcp_v4_or_free,
1405 .send_reset = tcp_v4_send_reset,
1406};
1407
1408int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1409{
1410 struct tcp_opt tp;
1411 struct open_request *req;
1412 __u32 saddr = skb->nh.iph->saddr;
1413 __u32 daddr = skb->nh.iph->daddr;
1414 __u32 isn = TCP_SKB_CB(skb)->when;
1415 struct dst_entry *dst = NULL;
1416#ifdef CONFIG_SYN_COOKIES
1417 int want_cookie = 0;
1418#else
1419#define want_cookie 0
1420#endif
1421
1422
1423 if (((struct rtable *)skb->dst)->rt_flags &
1424 (RTCF_BROADCAST | RTCF_MULTICAST))
1425 goto drop;
1426
1427
1428
1429
1430
1431 if (tcp_synq_is_full(sk) && !isn) {
1432#ifdef CONFIG_SYN_COOKIES
1433 if (sysctl_tcp_syncookies) {
1434 want_cookie = 1;
1435 } else
1436#endif
1437 goto drop;
1438 }
1439
1440
1441
1442
1443
1444
1445 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1446 goto drop;
1447
1448 req = tcp_openreq_alloc();
1449 if (!req)
1450 goto drop;
1451
1452 tcp_clear_options(&tp);
1453 tp.mss_clamp = 536;
1454 tp.user_mss = tcp_sk(sk)->user_mss;
1455
1456 tcp_parse_options(skb, &tp, 0);
1457
1458 if (want_cookie) {
1459 tcp_clear_options(&tp);
1460 tp.saw_tstamp = 0;
1461 }
1462
1463 if (tp.saw_tstamp && !tp.rcv_tsval) {
1464
1465
1466
1467
1468
1469 tp.saw_tstamp = 0;
1470 tp.tstamp_ok = 0;
1471 }
1472 tp.tstamp_ok = tp.saw_tstamp;
1473
1474 tcp_openreq_init(req, &tp, skb);
1475
1476 req->af.v4_req.loc_addr = daddr;
1477 req->af.v4_req.rmt_addr = saddr;
1478 req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
1479 req->class = &or_ipv4;
1480 if (!want_cookie)
1481 TCP_ECN_create_request(req, skb->h.th);
1482
1483 if (want_cookie) {
1484#ifdef CONFIG_SYN_COOKIES
1485 syn_flood_warning(skb);
1486#endif
1487 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1488 } else if (!isn) {
1489 struct inet_peer *peer = NULL;
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500 if (tp.saw_tstamp &&
1501 sysctl_tcp_tw_recycle &&
1502 (dst = tcp_v4_route_req(sk, req)) != NULL &&
1503 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1504 peer->v4daddr == saddr) {
1505 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1506 (s32)(peer->tcp_ts - req->ts_recent) >
1507 TCP_PAWS_WINDOW) {
1508 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1509 dst_release(dst);
1510 goto drop_and_free;
1511 }
1512 }
1513
1514 else if (!sysctl_tcp_syncookies &&
1515 (sysctl_max_syn_backlog - tcp_synq_len(sk) <
1516 (sysctl_max_syn_backlog >> 2)) &&
1517 (!peer || !peer->tcp_ts_stamp) &&
1518 (!dst || !dst_metric(dst, RTAX_RTT))) {
1519
1520
1521
1522
1523
1524
1525
1526 NETDEBUG(if (net_ratelimit()) \
1527 printk(KERN_DEBUG "TCP: drop open "
1528 "request from %u.%u."
1529 "%u.%u/%u\n", \
1530 NIPQUAD(saddr),
1531 ntohs(skb->h.th->source)));
1532 dst_release(dst);
1533 goto drop_and_free;
1534 }
1535
1536 isn = tcp_v4_init_sequence(sk, skb);
1537 }
1538 req->snt_isn = isn;
1539
1540 if (tcp_v4_send_synack(sk, req, dst))
1541 goto drop_and_free;
1542
1543 if (want_cookie) {
1544 tcp_openreq_free(req);
1545 } else {
1546 tcp_v4_synq_add(sk, req);
1547 }
1548 return 0;
1549
1550drop_and_free:
1551 tcp_openreq_free(req);
1552drop:
1553 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1554 return 0;
1555}
1556
1557
1558
1559
1560
1561
1562struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1563 struct open_request *req,
1564 struct dst_entry *dst)
1565{
1566 struct inet_opt *newinet;
1567 struct tcp_opt *newtp;
1568 struct sock *newsk;
1569
1570 if (sk_acceptq_is_full(sk))
1571 goto exit_overflow;
1572
1573 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1574 goto exit;
1575
1576 newsk = tcp_create_openreq_child(sk, req, skb);
1577 if (!newsk)
1578 goto exit;
1579
1580 newsk->sk_dst_cache = dst;
1581 tcp_v4_setup_caps(newsk, dst);
1582
1583 newtp = tcp_sk(newsk);
1584 newinet = inet_sk(newsk);
1585 newinet->daddr = req->af.v4_req.rmt_addr;
1586 newinet->rcv_saddr = req->af.v4_req.loc_addr;
1587 newinet->saddr = req->af.v4_req.loc_addr;
1588 newinet->opt = req->af.v4_req.opt;
1589 req->af.v4_req.opt = NULL;
1590 newinet->mc_index = tcp_v4_iif(skb);
1591 newinet->mc_ttl = skb->nh.iph->ttl;
1592 newtp->ext_header_len = 0;
1593 if (newinet->opt)
1594 newtp->ext_header_len = newinet->opt->optlen;
1595 newtp->ext2_header_len = dst->header_len;
1596 newinet->id = newtp->write_seq ^ jiffies;
1597
1598 tcp_sync_mss(newsk, dst_pmtu(dst));
1599 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1600 tcp_initialize_rcv_mss(newsk);
1601
1602 __tcp_v4_hash(newsk, 0);
1603 __tcp_inherit_port(sk, newsk);
1604
1605 return newsk;
1606
1607exit_overflow:
1608 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1609exit:
1610 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1611 dst_release(dst);
1612 return NULL;
1613}
1614
1615static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1616{
1617 struct tcphdr *th = skb->h.th;
1618 struct iphdr *iph = skb->nh.iph;
1619 struct tcp_opt *tp = tcp_sk(sk);
1620 struct sock *nsk;
1621 struct open_request **prev;
1622
1623 struct open_request *req = tcp_v4_search_req(tp, &prev, th->source,
1624 iph->saddr, iph->daddr);
1625 if (req)
1626 return tcp_check_req(sk, skb, req, prev);
1627
1628 nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
1629 th->source,
1630 skb->nh.iph->daddr,
1631 ntohs(th->dest),
1632 tcp_v4_iif(skb));
1633
1634 if (nsk) {
1635 if (nsk->sk_state != TCP_TIME_WAIT) {
1636 bh_lock_sock(nsk);
1637 return nsk;
1638 }
1639 tcp_tw_put((struct tcp_tw_bucket *)nsk);
1640 return NULL;
1641 }
1642
1643#ifdef CONFIG_SYN_COOKIES
1644 if (!th->rst && !th->syn && th->ack)
1645 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1646#endif
1647 return sk;
1648}
1649
1650static int tcp_v4_checksum_init(struct sk_buff *skb)
1651{
1652 if (skb->ip_summed == CHECKSUM_HW) {
1653 skb->ip_summed = CHECKSUM_UNNECESSARY;
1654 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1655 skb->nh.iph->daddr, skb->csum))
1656 return 0;
1657
1658 NETDEBUG(if (net_ratelimit())
1659 printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
1660 skb->ip_summed = CHECKSUM_NONE;
1661 }
1662 if (skb->len <= 76) {
1663 if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1664 skb->nh.iph->daddr,
1665 skb_checksum(skb, 0, skb->len, 0)))
1666 return -1;
1667 skb->ip_summed = CHECKSUM_UNNECESSARY;
1668 } else {
1669 skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
1670 skb->nh.iph->saddr,
1671 skb->nh.iph->daddr, 0);
1672 }
1673 return 0;
1674}
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1686{
1687 if (sk->sk_state == TCP_ESTABLISHED) {
1688 TCP_CHECK_TIMER(sk);
1689 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1690 goto reset;
1691 TCP_CHECK_TIMER(sk);
1692 return 0;
1693 }
1694
1695 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1696 goto csum_err;
1697
1698 if (sk->sk_state == TCP_LISTEN) {
1699 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1700 if (!nsk)
1701 goto discard;
1702
1703 if (nsk != sk) {
1704 if (tcp_child_process(sk, nsk, skb))
1705 goto reset;
1706 return 0;
1707 }
1708 }
1709
1710 TCP_CHECK_TIMER(sk);
1711 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1712 goto reset;
1713 TCP_CHECK_TIMER(sk);
1714 return 0;
1715
1716reset:
1717 tcp_v4_send_reset(skb);
1718discard:
1719 kfree_skb(skb);
1720
1721
1722
1723
1724
1725 return 0;
1726
1727csum_err:
1728 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1729 goto discard;
1730}
1731
1732
1733
1734
1735
1736int tcp_v4_rcv(struct sk_buff *skb)
1737{
1738 struct tcphdr *th;
1739 struct sock *sk;
1740 int ret;
1741
1742 if (skb->pkt_type != PACKET_HOST)
1743 goto discard_it;
1744
1745
1746 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1747
1748 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1749 goto discard_it;
1750
1751 th = skb->h.th;
1752
1753 if (th->doff < sizeof(struct tcphdr) / 4)
1754 goto bad_packet;
1755 if (!pskb_may_pull(skb, th->doff * 4))
1756 goto discard_it;
1757
1758
1759
1760
1761
1762 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1763 tcp_v4_checksum_init(skb) < 0))
1764 goto bad_packet;
1765
1766 th = skb->h.th;
1767 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1768 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1769 skb->len - th->doff * 4);
1770 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1771 TCP_SKB_CB(skb)->when = 0;
1772 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1773 TCP_SKB_CB(skb)->sacked = 0;
1774
1775 sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
1776 skb->nh.iph->daddr, ntohs(th->dest),
1777 tcp_v4_iif(skb));
1778
1779 if (!sk)
1780 goto no_tcp_socket;
1781
1782process:
1783 if (sk->sk_state == TCP_TIME_WAIT)
1784 goto do_time_wait;
1785
1786 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1787 goto discard_and_relse;
1788
1789 if (sk_filter(sk, skb, 0))
1790 goto discard_and_relse;
1791
1792 skb->dev = NULL;
1793
1794 bh_lock_sock(sk);
1795 ret = 0;
1796 if (!sock_owned_by_user(sk)) {
1797 if (!tcp_prequeue(sk, skb))
1798 ret = tcp_v4_do_rcv(sk, skb);
1799 } else
1800 sk_add_backlog(sk, skb);
1801 bh_unlock_sock(sk);
1802
1803 sock_put(sk);
1804
1805 return ret;
1806
1807no_tcp_socket:
1808 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1809 goto discard_it;
1810
1811 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1812bad_packet:
1813 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1814 } else {
1815 tcp_v4_send_reset(skb);
1816 }
1817
1818discard_it:
1819
1820 kfree_skb(skb);
1821 return 0;
1822
1823discard_and_relse:
1824 sock_put(sk);
1825 goto discard_it;
1826
1827do_time_wait:
1828 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1829 tcp_tw_put((struct tcp_tw_bucket *) sk);
1830 goto discard_it;
1831 }
1832
1833 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1834 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1835 tcp_tw_put((struct tcp_tw_bucket *) sk);
1836 goto discard_it;
1837 }
1838 switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1839 skb, th, skb->len)) {
1840 case TCP_TW_SYN: {
1841 struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
1842 ntohs(th->dest),
1843 tcp_v4_iif(skb));
1844 if (sk2) {
1845 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1846 tcp_tw_put((struct tcp_tw_bucket *)sk);
1847 sk = sk2;
1848 goto process;
1849 }
1850
1851 }
1852 case TCP_TW_ACK:
1853 tcp_v4_timewait_ack(sk, skb);
1854 break;
1855 case TCP_TW_RST:
1856 goto no_tcp_socket;
1857 case TCP_TW_SUCCESS:;
1858 }
1859 goto discard_it;
1860}
1861
1862
1863
1864
1865static void __tcp_v4_rehash(struct sock *sk)
1866{
1867 sk->sk_prot->unhash(sk);
1868 sk->sk_prot->hash(sk);
1869}
1870
1871static int tcp_v4_reselect_saddr(struct sock *sk)
1872{
1873 struct inet_opt *inet = inet_sk(sk);
1874 int err;
1875 struct rtable *rt;
1876 __u32 old_saddr = inet->saddr;
1877 __u32 new_saddr;
1878 __u32 daddr = inet->daddr;
1879
1880 if (inet->opt && inet->opt->srr)
1881 daddr = inet->opt->faddr;
1882
1883
1884 err = ip_route_connect(&rt, daddr, 0,
1885 RT_TOS(inet->tos) | sk->sk_localroute,
1886 sk->sk_bound_dev_if,
1887 IPPROTO_TCP,
1888 inet->sport, inet->dport, sk);
1889 if (err)
1890 return err;
1891
1892 __sk_dst_set(sk, &rt->u.dst);
1893 tcp_v4_setup_caps(sk, &rt->u.dst);
1894 tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1895
1896 new_saddr = rt->rt_src;
1897
1898 if (new_saddr == old_saddr)
1899 return 0;
1900
1901 if (sysctl_ip_dynaddr > 1) {
1902 printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
1903 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1904 NIPQUAD(old_saddr),
1905 NIPQUAD(new_saddr));
1906 }
1907
1908 inet->saddr = new_saddr;
1909 inet->rcv_saddr = new_saddr;
1910
1911
1912
1913
1914
1915
1916
1917
1918 __tcp_v4_rehash(sk);
1919 return 0;
1920}
1921
1922int tcp_v4_rebuild_header(struct sock *sk)
1923{
1924 struct inet_opt *inet = inet_sk(sk);
1925 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1926 u32 daddr;
1927 int err;
1928
1929
1930 if (rt)
1931 return 0;
1932
1933
1934 daddr = inet->daddr;
1935 if (inet->opt && inet->opt->srr)
1936 daddr = inet->opt->faddr;
1937
1938 {
1939 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1940 .nl_u = { .ip4_u =
1941 { .daddr = daddr,
1942 .saddr = inet->saddr,
1943 .tos = RT_CONN_FLAGS(sk) } },
1944 .proto = IPPROTO_TCP,
1945 .uli_u = { .ports =
1946 { .sport = inet->sport,
1947 .dport = inet->dport } } };
1948
1949 err = ip_route_output_flow(&rt, &fl, sk, 0);
1950 }
1951 if (!err) {
1952 __sk_dst_set(sk, &rt->u.dst);
1953 tcp_v4_setup_caps(sk, &rt->u.dst);
1954 tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1955 return 0;
1956 }
1957
1958
1959 sk->sk_route_caps = 0;
1960
1961 if (!sysctl_ip_dynaddr ||
1962 sk->sk_state != TCP_SYN_SENT ||
1963 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1964 (err = tcp_v4_reselect_saddr(sk)) != 0)
1965 sk->sk_err_soft = -err;
1966
1967 return err;
1968}
1969
1970static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1971{
1972 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1973 struct inet_opt *inet = inet_sk(sk);
1974
1975 sin->sin_family = AF_INET;
1976 sin->sin_addr.s_addr = inet->daddr;
1977 sin->sin_port = inet->dport;
1978}
1979
1980
1981
1982
1983
1984
1985
1986int tcp_v4_remember_stamp(struct sock *sk)
1987{
1988 struct inet_opt *inet = inet_sk(sk);
1989 struct tcp_opt *tp = tcp_sk(sk);
1990 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1991 struct inet_peer *peer = NULL;
1992 int release_it = 0;
1993
1994 if (!rt || rt->rt_dst != inet->daddr) {
1995 peer = inet_getpeer(inet->daddr, 1);
1996 release_it = 1;
1997 } else {
1998 if (!rt->peer)
1999 rt_bind_peer(rt, 1);
2000 peer = rt->peer;
2001 }
2002
2003 if (peer) {
2004 if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
2005 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2006 peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
2007 peer->tcp_ts_stamp = tp->ts_recent_stamp;
2008 peer->tcp_ts = tp->ts_recent;
2009 }
2010 if (release_it)
2011 inet_putpeer(peer);
2012 return 1;
2013 }
2014
2015 return 0;
2016}
2017
2018int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
2019{
2020 struct inet_peer *peer = NULL;
2021
2022 peer = inet_getpeer(tw->tw_daddr, 1);
2023
2024 if (peer) {
2025 if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
2026 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2027 peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
2028 peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
2029 peer->tcp_ts = tw->tw_ts_recent;
2030 }
2031 inet_putpeer(peer);
2032 return 1;
2033 }
2034
2035 return 0;
2036}
2037
2038struct tcp_func ipv4_specific = {
2039 .queue_xmit = ip_queue_xmit,
2040 .send_check = tcp_v4_send_check,
2041 .rebuild_header = tcp_v4_rebuild_header,
2042 .conn_request = tcp_v4_conn_request,
2043 .syn_recv_sock = tcp_v4_syn_recv_sock,
2044 .remember_stamp = tcp_v4_remember_stamp,
2045 .net_header_len = sizeof(struct iphdr),
2046 .setsockopt = ip_setsockopt,
2047 .getsockopt = ip_getsockopt,
2048 .addr2sockaddr = v4_addr2sockaddr,
2049 .sockaddr_len = sizeof(struct sockaddr_in),
2050};
2051
2052
2053
2054
2055static int tcp_v4_init_sock(struct sock *sk)
2056{
2057 struct tcp_opt *tp = tcp_sk(sk);
2058
2059 skb_queue_head_init(&tp->out_of_order_queue);
2060 tcp_init_xmit_timers(sk);
2061 tcp_prequeue_init(tp);
2062
2063 tp->rto = TCP_TIMEOUT_INIT;
2064 tp->mdev = TCP_TIMEOUT_INIT;
2065
2066
2067
2068
2069
2070
2071 tp->snd_cwnd = 2;
2072
2073
2074
2075
2076 tp->snd_ssthresh = 0x7fffffff;
2077 tp->snd_cwnd_clamp = ~0;
2078 tp->mss_cache = 536;
2079
2080 tp->reordering = sysctl_tcp_reordering;
2081
2082 sk->sk_state = TCP_CLOSE;
2083
2084 sk->sk_write_space = sk_stream_write_space;
2085 sk->sk_use_write_queue = 1;
2086
2087 tp->af_specific = &ipv4_specific;
2088
2089 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2090 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2091
2092 atomic_inc(&tcp_sockets_allocated);
2093
2094 return 0;
2095}
2096
2097int tcp_v4_destroy_sock(struct sock *sk)
2098{
2099 struct tcp_opt *tp = tcp_sk(sk);
2100
2101 tcp_clear_xmit_timers(sk);
2102
2103
2104 sk_stream_writequeue_purge(sk);
2105
2106
2107 __skb_queue_purge(&tp->out_of_order_queue);
2108
2109
2110 __skb_queue_purge(&tp->ucopy.prequeue);
2111
2112
2113 if (tp->bind_hash)
2114 tcp_put_port(sk);
2115
2116
2117
2118
2119 if (sk->sk_sndmsg_page) {
2120 __free_page(sk->sk_sndmsg_page);
2121 sk->sk_sndmsg_page = NULL;
2122 }
2123
2124 atomic_dec(&tcp_sockets_allocated);
2125
2126 return 0;
2127}
2128
2129EXPORT_SYMBOL(tcp_v4_destroy_sock);
2130
2131#ifdef CONFIG_PROC_FS
2132
2133
2134static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
2135{
2136 return hlist_empty(head) ? NULL :
2137 list_entry(head->first, struct tcp_tw_bucket, tw_node);
2138}
2139
2140static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
2141{
2142 return tw->tw_node.next ?
2143 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2144}
2145
2146static void *listening_get_next(struct seq_file *seq, void *cur)
2147{
2148 struct tcp_opt *tp;
2149 struct hlist_node *node;
2150 struct sock *sk = cur;
2151 struct tcp_iter_state* st = seq->private;
2152
2153 if (!sk) {
2154 st->bucket = 0;
2155 sk = sk_head(&tcp_listening_hash[0]);
2156 goto get_sk;
2157 }
2158
2159 ++st->num;
2160
2161 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2162 struct open_request *req = cur;
2163
2164 tp = tcp_sk(st->syn_wait_sk);
2165 req = req->dl_next;
2166 while (1) {
2167 while (req) {
2168 if (req->class->family == st->family) {
2169 cur = req;
2170 goto out;
2171 }
2172 req = req->dl_next;
2173 }
2174 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2175 break;
2176get_req:
2177 req = tp->listen_opt->syn_table[st->sbucket];
2178 }
2179 sk = sk_next(st->syn_wait_sk);
2180 st->state = TCP_SEQ_STATE_LISTENING;
2181 read_unlock_bh(&tp->syn_wait_lock);
2182 } else
2183 sk = sk_next(sk);
2184get_sk:
2185 sk_for_each_from(sk, node) {
2186 if (sk->sk_family == st->family) {
2187 cur = sk;
2188 goto out;
2189 }
2190 tp = tcp_sk(sk);
2191 read_lock_bh(&tp->syn_wait_lock);
2192 if (tp->listen_opt && tp->listen_opt->qlen) {
2193 st->uid = sock_i_uid(sk);
2194 st->syn_wait_sk = sk;
2195 st->state = TCP_SEQ_STATE_OPENREQ;
2196 st->sbucket = 0;
2197 goto get_req;
2198 }
2199 read_unlock_bh(&tp->syn_wait_lock);
2200 }
2201 if (++st->bucket < TCP_LHTABLE_SIZE) {
2202 sk = sk_head(&tcp_listening_hash[st->bucket]);
2203 goto get_sk;
2204 }
2205 cur = NULL;
2206out:
2207 return cur;
2208}
2209
2210static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2211{
2212 void *rc = listening_get_next(seq, NULL);
2213
2214 while (rc && *pos) {
2215 rc = listening_get_next(seq, rc);
2216 --*pos;
2217 }
2218 return rc;
2219}
2220
2221static void *established_get_first(struct seq_file *seq)
2222{
2223 struct tcp_iter_state* st = seq->private;
2224 void *rc = NULL;
2225
2226 for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
2227 struct sock *sk;
2228 struct hlist_node *node;
2229 struct tcp_tw_bucket *tw;
2230
2231 read_lock(&tcp_ehash[st->bucket].lock);
2232 sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
2233 if (sk->sk_family != st->family) {
2234 continue;
2235 }
2236 rc = sk;
2237 goto out;
2238 }
2239 st->state = TCP_SEQ_STATE_TIME_WAIT;
2240 tw_for_each(tw, node,
2241 &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
2242 if (tw->tw_family != st->family) {
2243 continue;
2244 }
2245 rc = tw;
2246 goto out;
2247 }
2248 read_unlock(&tcp_ehash[st->bucket].lock);
2249 st->state = TCP_SEQ_STATE_ESTABLISHED;
2250 }
2251out:
2252 return rc;
2253}
2254
2255static void *established_get_next(struct seq_file *seq, void *cur)
2256{
2257 struct sock *sk = cur;
2258 struct tcp_tw_bucket *tw;
2259 struct hlist_node *node;
2260 struct tcp_iter_state* st = seq->private;
2261
2262 ++st->num;
2263
2264 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2265 tw = cur;
2266 tw = tw_next(tw);
2267get_tw:
2268 while (tw && tw->tw_family != st->family) {
2269 tw = tw_next(tw);
2270 }
2271 if (tw) {
2272 cur = tw;
2273 goto out;
2274 }
2275 read_unlock(&tcp_ehash[st->bucket].lock);
2276 st->state = TCP_SEQ_STATE_ESTABLISHED;
2277 if (++st->bucket < tcp_ehash_size) {
2278 read_lock(&tcp_ehash[st->bucket].lock);
2279 sk = sk_head(&tcp_ehash[st->bucket].chain);
2280 } else {
2281 cur = NULL;
2282 goto out;
2283 }
2284 } else
2285 sk = sk_next(sk);
2286
2287 sk_for_each_from(sk, node) {
2288 if (sk->sk_family == st->family)
2289 goto found;
2290 }
2291
2292 st->state = TCP_SEQ_STATE_TIME_WAIT;
2293 tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
2294 goto get_tw;
2295found:
2296 cur = sk;
2297out:
2298 return cur;
2299}
2300
2301static void *established_get_idx(struct seq_file *seq, loff_t pos)
2302{
2303 void *rc = established_get_first(seq);
2304
2305 while (rc && pos) {
2306 rc = established_get_next(seq, rc);
2307 --pos;
2308 }
2309 return rc;
2310}
2311
2312static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2313{
2314 void *rc;
2315 struct tcp_iter_state* st = seq->private;
2316
2317 tcp_listen_lock();
2318 st->state = TCP_SEQ_STATE_LISTENING;
2319 rc = listening_get_idx(seq, &pos);
2320
2321 if (!rc) {
2322 tcp_listen_unlock();
2323 local_bh_disable();
2324 st->state = TCP_SEQ_STATE_ESTABLISHED;
2325 rc = established_get_idx(seq, pos);
2326 }
2327
2328 return rc;
2329}
2330
2331static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2332{
2333 struct tcp_iter_state* st = seq->private;
2334 st->state = TCP_SEQ_STATE_LISTENING;
2335 st->num = 0;
2336 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2337}
2338
2339static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2340{
2341 void *rc = NULL;
2342 struct tcp_iter_state* st;
2343
2344 if (v == SEQ_START_TOKEN) {
2345 rc = tcp_get_idx(seq, 0);
2346 goto out;
2347 }
2348 st = seq->private;
2349
2350 switch (st->state) {
2351 case TCP_SEQ_STATE_OPENREQ:
2352 case TCP_SEQ_STATE_LISTENING:
2353 rc = listening_get_next(seq, v);
2354 if (!rc) {
2355 tcp_listen_unlock();
2356 local_bh_disable();
2357 st->state = TCP_SEQ_STATE_ESTABLISHED;
2358 rc = established_get_first(seq);
2359 }
2360 break;
2361 case TCP_SEQ_STATE_ESTABLISHED:
2362 case TCP_SEQ_STATE_TIME_WAIT:
2363 rc = established_get_next(seq, v);
2364 break;
2365 }
2366out:
2367 ++*pos;
2368 return rc;
2369}
2370
2371static void tcp_seq_stop(struct seq_file *seq, void *v)
2372{
2373 struct tcp_iter_state* st = seq->private;
2374
2375 switch (st->state) {
2376 case TCP_SEQ_STATE_OPENREQ:
2377 if (v) {
2378 struct tcp_opt *tp = tcp_sk(st->syn_wait_sk);
2379 read_unlock_bh(&tp->syn_wait_lock);
2380 }
2381 case TCP_SEQ_STATE_LISTENING:
2382 if (v != SEQ_START_TOKEN)
2383 tcp_listen_unlock();
2384 break;
2385 case TCP_SEQ_STATE_TIME_WAIT:
2386 case TCP_SEQ_STATE_ESTABLISHED:
2387 if (v)
2388 read_unlock(&tcp_ehash[st->bucket].lock);
2389 local_bh_enable();
2390 break;
2391 }
2392}
2393
2394static int tcp_seq_open(struct inode *inode, struct file *file)
2395{
2396 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2397 struct seq_file *seq;
2398 struct tcp_iter_state *s;
2399 int rc;
2400
2401 if (unlikely(afinfo == NULL))
2402 return -EINVAL;
2403
2404 s = kmalloc(sizeof(*s), GFP_KERNEL);
2405 if (!s)
2406 return -ENOMEM;
2407 memset(s, 0, sizeof(*s));
2408 s->family = afinfo->family;
2409 s->seq_ops.start = tcp_seq_start;
2410 s->seq_ops.next = tcp_seq_next;
2411 s->seq_ops.show = afinfo->seq_show;
2412 s->seq_ops.stop = tcp_seq_stop;
2413
2414 rc = seq_open(file, &s->seq_ops);
2415 if (rc)
2416 goto out_kfree;
2417 seq = file->private_data;
2418 seq->private = s;
2419out:
2420 return rc;
2421out_kfree:
2422 kfree(s);
2423 goto out;
2424}
2425
2426int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2427{
2428 int rc = 0;
2429 struct proc_dir_entry *p;
2430
2431 if (!afinfo)
2432 return -EINVAL;
2433 afinfo->seq_fops->owner = afinfo->owner;
2434 afinfo->seq_fops->open = tcp_seq_open;
2435 afinfo->seq_fops->read = seq_read;
2436 afinfo->seq_fops->llseek = seq_lseek;
2437 afinfo->seq_fops->release = seq_release_private;
2438
2439 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2440 if (p)
2441 p->data = afinfo;
2442 else
2443 rc = -ENOMEM;
2444 return rc;
2445}
2446
2447void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2448{
2449 if (!afinfo)
2450 return;
2451 proc_net_remove(afinfo->name);
2452 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2453}
2454
2455static void get_openreq4(struct sock *sk, struct open_request *req,
2456 char *tmpbuf, int i, int uid)
2457{
2458 int ttd = req->expires - jiffies;
2459
2460 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2461 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2462 i,
2463 req->af.v4_req.loc_addr,
2464 ntohs(inet_sk(sk)->sport),
2465 req->af.v4_req.rmt_addr,
2466 ntohs(req->rmt_port),
2467 TCP_SYN_RECV,
2468 0, 0,
2469 1,
2470 jiffies_to_clock_t(ttd),
2471 req->retrans,
2472 uid,
2473 0,
2474 0,
2475 atomic_read(&sk->sk_refcnt),
2476 req);
2477}
2478
2479static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2480{
2481 int timer_active;
2482 unsigned long timer_expires;
2483 struct tcp_opt *tp = tcp_sk(sp);
2484 struct inet_opt *inet = inet_sk(sp);
2485 unsigned int dest = inet->daddr;
2486 unsigned int src = inet->rcv_saddr;
2487 __u16 destp = ntohs(inet->dport);
2488 __u16 srcp = ntohs(inet->sport);
2489
2490 if (tp->pending == TCP_TIME_RETRANS) {
2491 timer_active = 1;
2492 timer_expires = tp->timeout;
2493 } else if (tp->pending == TCP_TIME_PROBE0) {
2494 timer_active = 4;
2495 timer_expires = tp->timeout;
2496 } else if (timer_pending(&sp->sk_timer)) {
2497 timer_active = 2;
2498 timer_expires = sp->sk_timer.expires;
2499 } else {
2500 timer_active = 0;
2501 timer_expires = jiffies;
2502 }
2503
2504 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2505 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2506 i, src, srcp, dest, destp, sp->sk_state,
2507 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
2508 timer_active,
2509 jiffies_to_clock_t(timer_expires - jiffies),
2510 tp->retransmits,
2511 sock_i_uid(sp),
2512 tp->probes_out,
2513 sock_i_ino(sp),
2514 atomic_read(&sp->sk_refcnt), sp,
2515 tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
2516 tp->snd_cwnd,
2517 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2518}
2519
2520static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
2521{
2522 unsigned int dest, src;
2523 __u16 destp, srcp;
2524 int ttd = tw->tw_ttd - jiffies;
2525
2526 if (ttd < 0)
2527 ttd = 0;
2528
2529 dest = tw->tw_daddr;
2530 src = tw->tw_rcv_saddr;
2531 destp = ntohs(tw->tw_dport);
2532 srcp = ntohs(tw->tw_sport);
2533
2534 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2535 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2536 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2537 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2538 atomic_read(&tw->tw_refcnt), tw);
2539}
2540
2541#define TMPSZ 150
2542
2543static int tcp4_seq_show(struct seq_file *seq, void *v)
2544{
2545 struct tcp_iter_state* st;
2546 char tmpbuf[TMPSZ + 1];
2547
2548 if (v == SEQ_START_TOKEN) {
2549 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2550 " sl local_address rem_address st tx_queue "
2551 "rx_queue tr tm->when retrnsmt uid timeout "
2552 "inode");
2553 goto out;
2554 }
2555 st = seq->private;
2556
2557 switch (st->state) {
2558 case TCP_SEQ_STATE_LISTENING:
2559 case TCP_SEQ_STATE_ESTABLISHED:
2560 get_tcp4_sock(v, tmpbuf, st->num);
2561 break;
2562 case TCP_SEQ_STATE_OPENREQ:
2563 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2564 break;
2565 case TCP_SEQ_STATE_TIME_WAIT:
2566 get_timewait4_sock(v, tmpbuf, st->num);
2567 break;
2568 }
2569 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2570out:
2571 return 0;
2572}
2573
2574static struct file_operations tcp4_seq_fops;
2575static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2576 .owner = THIS_MODULE,
2577 .name = "tcp",
2578 .family = AF_INET,
2579 .seq_show = tcp4_seq_show,
2580 .seq_fops = &tcp4_seq_fops,
2581};
2582
2583int __init tcp4_proc_init(void)
2584{
2585 return tcp_proc_register(&tcp4_seq_afinfo);
2586}
2587
2588void tcp4_proc_exit(void)
2589{
2590 tcp_proc_unregister(&tcp4_seq_afinfo);
2591}
2592#endif
2593
2594struct proto tcp_prot = {
2595 .name = "TCP",
2596 .close = tcp_close,
2597 .connect = tcp_v4_connect,
2598 .disconnect = tcp_disconnect,
2599 .accept = tcp_accept,
2600 .ioctl = tcp_ioctl,
2601 .init = tcp_v4_init_sock,
2602 .destroy = tcp_v4_destroy_sock,
2603 .shutdown = tcp_shutdown,
2604 .setsockopt = tcp_setsockopt,
2605 .getsockopt = tcp_getsockopt,
2606 .sendmsg = tcp_sendmsg,
2607 .recvmsg = tcp_recvmsg,
2608 .backlog_rcv = tcp_v4_do_rcv,
2609 .hash = tcp_v4_hash,
2610 .unhash = tcp_unhash,
2611 .get_port = tcp_v4_get_port,
2612 .enter_memory_pressure = tcp_enter_memory_pressure,
2613 .sockets_allocated = &tcp_sockets_allocated,
2614 .memory_allocated = &tcp_memory_allocated,
2615 .memory_pressure = &tcp_memory_pressure,
2616 .sysctl_mem = sysctl_tcp_mem,
2617 .sysctl_wmem = sysctl_tcp_wmem,
2618 .sysctl_rmem = sysctl_tcp_rmem,
2619 .max_header = MAX_TCP_HEADER,
2620};
2621
2622
2623
2624void __init tcp_v4_init(struct net_proto_family *ops)
2625{
2626 int err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket);
2627 if (err < 0)
2628 panic("Failed to create the TCP control socket.\n");
2629 tcp_socket->sk->sk_allocation = GFP_ATOMIC;
2630 inet_sk(tcp_socket->sk)->uc_ttl = -1;
2631
2632
2633
2634
2635
2636 tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
2637}
2638
2639EXPORT_SYMBOL(ipv4_specific);
2640EXPORT_SYMBOL(tcp_bind_hash);
2641EXPORT_SYMBOL(tcp_bucket_create);
2642EXPORT_SYMBOL(tcp_hashinfo);
2643EXPORT_SYMBOL(tcp_inherit_port);
2644EXPORT_SYMBOL(tcp_listen_wlock);
2645EXPORT_SYMBOL(tcp_port_rover);
2646EXPORT_SYMBOL(tcp_prot);
2647EXPORT_SYMBOL(tcp_put_port);
2648EXPORT_SYMBOL(tcp_unhash);
2649EXPORT_SYMBOL(tcp_v4_conn_request);
2650EXPORT_SYMBOL(tcp_v4_connect);
2651EXPORT_SYMBOL(tcp_v4_do_rcv);
2652EXPORT_SYMBOL(tcp_v4_lookup_listener);
2653EXPORT_SYMBOL(tcp_v4_rebuild_header);
2654EXPORT_SYMBOL(tcp_v4_remember_stamp);
2655EXPORT_SYMBOL(tcp_v4_send_check);
2656EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2657
2658#ifdef CONFIG_PROC_FS
2659EXPORT_SYMBOL(tcp_proc_register);
2660EXPORT_SYMBOL(tcp_proc_unregister);
2661#endif
2662#ifdef CONFIG_SYSCTL
2663EXPORT_SYMBOL(sysctl_local_port_range);
2664EXPORT_SYMBOL(sysctl_max_syn_backlog);
2665EXPORT_SYMBOL(sysctl_tcp_low_latency);
2666#endif
2667