1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/config.h>
56
57#include <linux/types.h>
58#include <linux/fcntl.h>
59#include <linux/module.h>
60#include <linux/random.h>
61#include <linux/cache.h>
62#include <linux/jhash.h>
63#include <linux/init.h>
64#include <linux/times.h>
65
66#include <net/icmp.h>
67#include <net/tcp.h>
68#include <net/ipv6.h>
69#include <net/inet_common.h>
70#include <net/xfrm.h>
71
72#include <linux/inet.h>
73#include <linux/ipv6.h>
74#include <linux/stddef.h>
75#include <linux/proc_fs.h>
76#include <linux/seq_file.h>
77
78extern int sysctl_ip_dynaddr;
79int sysctl_tcp_tw_reuse;
80int sysctl_tcp_low_latency;
81
82
83#define ICMP_MIN_LENGTH 8
84
85
86static struct socket *tcp_socket;
87
88void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
89 struct sk_buff *skb);
90
91struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
92 .__tcp_lhash_lock = RW_LOCK_UNLOCKED,
93 .__tcp_lhash_users = ATOMIC_INIT(0),
94 .__tcp_lhash_wait
95 = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait),
96 .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED
97};
98
99
100
101
102
103
104int sysctl_local_port_range[2] = { 1024, 4999 };
105int tcp_port_rover = 1024 - 1;
106
107static __inline__ int tcp_hashfn(__u32 laddr, __u16 lport,
108 __u32 faddr, __u16 fport)
109{
110 int h = (laddr ^ lport) ^ (faddr ^ fport);
111 h ^= h >> 16;
112 h ^= h >> 8;
113 return h & (tcp_ehash_size - 1);
114}
115
116static __inline__ int tcp_sk_hashfn(struct sock *sk)
117{
118 struct inet_opt *inet = inet_sk(sk);
119 __u32 laddr = inet->rcv_saddr;
120 __u16 lport = inet->num;
121 __u32 faddr = inet->daddr;
122 __u16 fport = inet->dport;
123
124 return tcp_hashfn(laddr, lport, faddr, fport);
125}
126
127
128
129
130struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
131 unsigned short snum)
132{
133 struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep,
134 SLAB_ATOMIC);
135 if (tb) {
136 tb->port = snum;
137 tb->fastreuse = 0;
138 INIT_HLIST_HEAD(&tb->owners);
139 hlist_add_head(&tb->node, &head->chain);
140 }
141 return tb;
142}
143
144
145void tcp_bucket_destroy(struct tcp_bind_bucket *tb)
146{
147 if (hlist_empty(&tb->owners)) {
148 __hlist_del(&tb->node);
149 kmem_cache_free(tcp_bucket_cachep, tb);
150 }
151}
152
153
154static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
155{
156 struct tcp_bind_hashbucket *head =
157 &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)];
158 struct tcp_bind_bucket *tb;
159
160 spin_lock(&head->lock);
161 tb = tcp_sk(sk)->bind_hash;
162 sk_add_bind_node(child, &tb->owners);
163 tcp_sk(child)->bind_hash = tb;
164 spin_unlock(&head->lock);
165}
166
167inline void tcp_inherit_port(struct sock *sk, struct sock *child)
168{
169 local_bh_disable();
170 __tcp_inherit_port(sk, child);
171 local_bh_enable();
172}
173
174void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
175 unsigned short snum)
176{
177 inet_sk(sk)->num = snum;
178 sk_add_bind_node(sk, &tb->owners);
179 tcp_sk(sk)->bind_hash = tb;
180}
181
182static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb)
183{
184 const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk);
185 struct sock *sk2;
186 struct hlist_node *node;
187 int reuse = sk->sk_reuse;
188
189 sk_for_each_bound(sk2, node, &tb->owners) {
190 if (sk != sk2 &&
191 !tcp_v6_ipv6only(sk2) &&
192 (!sk->sk_bound_dev_if ||
193 !sk2->sk_bound_dev_if ||
194 sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
195 if (!reuse || !sk2->sk_reuse ||
196 sk2->sk_state == TCP_LISTEN) {
197 const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
198 if (!sk2_rcv_saddr || !sk_rcv_saddr ||
199 sk2_rcv_saddr == sk_rcv_saddr)
200 break;
201 }
202 }
203 }
204 return node != NULL;
205}
206
207
208
209
210static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
211{
212 struct tcp_bind_hashbucket *head;
213 struct hlist_node *node;
214 struct tcp_bind_bucket *tb;
215 int ret;
216
217 local_bh_disable();
218 if (!snum) {
219 int low = sysctl_local_port_range[0];
220 int high = sysctl_local_port_range[1];
221 int remaining = (high - low) + 1;
222 int rover;
223
224 spin_lock(&tcp_portalloc_lock);
225 rover = tcp_port_rover;
226 do {
227 rover++;
228 if (rover < low || rover > high)
229 rover = low;
230 head = &tcp_bhash[tcp_bhashfn(rover)];
231 spin_lock(&head->lock);
232 tb_for_each(tb, node, &head->chain)
233 if (tb->port == rover)
234 goto next;
235 break;
236 next:
237 spin_unlock(&head->lock);
238 } while (--remaining > 0);
239 tcp_port_rover = rover;
240 spin_unlock(&tcp_portalloc_lock);
241
242
243 ret = 1;
244 if (remaining <= 0)
245 goto fail;
246
247
248
249
250 snum = rover;
251 } else {
252 head = &tcp_bhash[tcp_bhashfn(snum)];
253 spin_lock(&head->lock);
254 tb_for_each(tb, node, &head->chain)
255 if (tb->port == snum)
256 goto tb_found;
257 }
258 tb = NULL;
259 goto tb_not_found;
260tb_found:
261 if (!hlist_empty(&tb->owners)) {
262 if (sk->sk_reuse > 1)
263 goto success;
264 if (tb->fastreuse > 0 &&
265 sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
266 goto success;
267 } else {
268 ret = 1;
269 if (tcp_bind_conflict(sk, tb))
270 goto fail_unlock;
271 }
272 }
273tb_not_found:
274 ret = 1;
275 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
276 goto fail_unlock;
277 if (hlist_empty(&tb->owners)) {
278 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
279 tb->fastreuse = 1;
280 else
281 tb->fastreuse = 0;
282 } else if (tb->fastreuse &&
283 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
284 tb->fastreuse = 0;
285success:
286 if (!tcp_sk(sk)->bind_hash)
287 tcp_bind_hash(sk, tb, snum);
288 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
289 ret = 0;
290
291fail_unlock:
292 spin_unlock(&head->lock);
293fail:
294 local_bh_enable();
295 return ret;
296}
297
298
299
300
301static void __tcp_put_port(struct sock *sk)
302{
303 struct inet_opt *inet = inet_sk(sk);
304 struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)];
305 struct tcp_bind_bucket *tb;
306
307 spin_lock(&head->lock);
308 tb = tcp_sk(sk)->bind_hash;
309 __sk_del_bind_node(sk);
310 tcp_sk(sk)->bind_hash = NULL;
311 inet->num = 0;
312 tcp_bucket_destroy(tb);
313 spin_unlock(&head->lock);
314}
315
316void tcp_put_port(struct sock *sk)
317{
318 local_bh_disable();
319 __tcp_put_port(sk);
320 local_bh_enable();
321}
322
323
324
325
326
327
328
329
330void tcp_listen_wlock(void)
331{
332 write_lock(&tcp_lhash_lock);
333
334 if (atomic_read(&tcp_lhash_users)) {
335 DEFINE_WAIT(wait);
336
337 for (;;) {
338 prepare_to_wait_exclusive(&tcp_lhash_wait,
339 &wait, TASK_UNINTERRUPTIBLE);
340 if (!atomic_read(&tcp_lhash_users))
341 break;
342 write_unlock_bh(&tcp_lhash_lock);
343 schedule();
344 write_lock_bh(&tcp_lhash_lock);
345 }
346
347 finish_wait(&tcp_lhash_wait, &wait);
348 }
349}
350
351static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
352{
353 struct hlist_head *list;
354 rwlock_t *lock;
355
356 BUG_TRAP(sk_unhashed(sk));
357 if (listen_possible && sk->sk_state == TCP_LISTEN) {
358 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
359 lock = &tcp_lhash_lock;
360 tcp_listen_wlock();
361 } else {
362 list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain;
363 lock = &tcp_ehash[sk->sk_hashent].lock;
364 write_lock(lock);
365 }
366 __sk_add_node(sk, list);
367 sock_prot_inc_use(sk->sk_prot);
368 write_unlock(lock);
369 if (listen_possible && sk->sk_state == TCP_LISTEN)
370 wake_up(&tcp_lhash_wait);
371}
372
373static void tcp_v4_hash(struct sock *sk)
374{
375 if (sk->sk_state != TCP_CLOSE) {
376 local_bh_disable();
377 __tcp_v4_hash(sk, 1);
378 local_bh_enable();
379 }
380}
381
382void tcp_unhash(struct sock *sk)
383{
384 rwlock_t *lock;
385
386 if (sk_unhashed(sk))
387 goto ende;
388
389 if (sk->sk_state == TCP_LISTEN) {
390 local_bh_disable();
391 tcp_listen_wlock();
392 lock = &tcp_lhash_lock;
393 } else {
394 struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent];
395 lock = &head->lock;
396 write_lock_bh(&head->lock);
397 }
398
399 if (__sk_del_node_init(sk))
400 sock_prot_dec_use(sk->sk_prot);
401 write_unlock_bh(lock);
402
403 ende:
404 if (sk->sk_state == TCP_LISTEN)
405 wake_up(&tcp_lhash_wait);
406}
407
408
409
410
411
412
413
414static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr,
415 unsigned short hnum, int dif)
416{
417 struct sock *result = NULL, *sk;
418 struct hlist_node *node;
419 int score, hiscore;
420
421 hiscore=-1;
422 sk_for_each(sk, node, head) {
423 struct inet_opt *inet = inet_sk(sk);
424
425 if (inet->num == hnum && !ipv6_only_sock(sk)) {
426 __u32 rcv_saddr = inet->rcv_saddr;
427
428 score = (sk->sk_family == PF_INET ? 1 : 0);
429 if (rcv_saddr) {
430 if (rcv_saddr != daddr)
431 continue;
432 score+=2;
433 }
434 if (sk->sk_bound_dev_if) {
435 if (sk->sk_bound_dev_if != dif)
436 continue;
437 score+=2;
438 }
439 if (score == 5)
440 return sk;
441 if (score > hiscore) {
442 hiscore = score;
443 result = sk;
444 }
445 }
446 }
447 return result;
448}
449
450
451inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum,
452 int dif)
453{
454 struct sock *sk = NULL;
455 struct hlist_head *head;
456
457 read_lock(&tcp_lhash_lock);
458 head = &tcp_listening_hash[tcp_lhashfn(hnum)];
459 if (!hlist_empty(head)) {
460 struct inet_opt *inet = inet_sk((sk = __sk_head(head)));
461
462 if (inet->num == hnum && !sk->sk_node.next &&
463 (!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
464 (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
465 !sk->sk_bound_dev_if)
466 goto sherry_cache;
467 sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif);
468 }
469 if (sk) {
470sherry_cache:
471 sock_hold(sk);
472 }
473 read_unlock(&tcp_lhash_lock);
474 return sk;
475}
476
477
478
479
480
481
482
483static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport,
484 u32 daddr, u16 hnum,
485 int dif)
486{
487 struct tcp_ehash_bucket *head;
488 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
489 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
490 struct sock *sk;
491 struct hlist_node *node;
492
493
494
495 int hash = tcp_hashfn(daddr, hnum, saddr, sport);
496 head = &tcp_ehash[hash];
497 read_lock(&head->lock);
498 sk_for_each(sk, node, &head->chain) {
499 if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif))
500 goto hit;
501 }
502
503
504 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
505 if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
506 goto hit;
507 }
508 sk = NULL;
509out:
510 read_unlock(&head->lock);
511 return sk;
512hit:
513 sock_hold(sk);
514 goto out;
515}
516
517static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
518 u32 daddr, u16 hnum, int dif)
519{
520 struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
521 daddr, hnum, dif);
522
523 return sk ? : tcp_v4_lookup_listener(daddr, hnum, dif);
524}
525
526inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
527 u16 dport, int dif)
528{
529 struct sock *sk;
530
531 local_bh_disable();
532 sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
533 local_bh_enable();
534
535 return sk;
536}
537
538static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
539{
540 return secure_tcp_sequence_number(skb->nh.iph->daddr,
541 skb->nh.iph->saddr,
542 skb->h.th->dest,
543 skb->h.th->source);
544}
545
546
547static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
548 struct tcp_tw_bucket **twp)
549{
550 struct inet_opt *inet = inet_sk(sk);
551 u32 daddr = inet->rcv_saddr;
552 u32 saddr = inet->daddr;
553 int dif = sk->sk_bound_dev_if;
554 TCP_V4_ADDR_COOKIE(acookie, saddr, daddr)
555 __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
556 int hash = tcp_hashfn(daddr, lport, saddr, inet->dport);
557 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
558 struct sock *sk2;
559 struct hlist_node *node;
560 struct tcp_tw_bucket *tw;
561
562 write_lock(&head->lock);
563
564
565 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
566 tw = (struct tcp_tw_bucket *)sk2;
567
568 if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) {
569 struct tcp_opt *tp = tcp_sk(sk);
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585 if (tw->tw_ts_recent_stamp &&
586 (!twp || (sysctl_tcp_tw_reuse &&
587 xtime.tv_sec -
588 tw->tw_ts_recent_stamp > 1))) {
589 if ((tp->write_seq =
590 tw->tw_snd_nxt + 65535 + 2) == 0)
591 tp->write_seq = 1;
592 tp->ts_recent = tw->tw_ts_recent;
593 tp->ts_recent_stamp = tw->tw_ts_recent_stamp;
594 sock_hold(sk2);
595 goto unique;
596 } else
597 goto not_unique;
598 }
599 }
600 tw = NULL;
601
602
603 sk_for_each(sk2, node, &head->chain) {
604 if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif))
605 goto not_unique;
606 }
607
608unique:
609
610
611 inet->num = lport;
612 inet->sport = htons(lport);
613 sk->sk_hashent = hash;
614 BUG_TRAP(sk_unhashed(sk));
615 __sk_add_node(sk, &head->chain);
616 sock_prot_inc_use(sk->sk_prot);
617 write_unlock(&head->lock);
618
619 if (twp) {
620 *twp = tw;
621 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
622 } else if (tw) {
623
624 tcp_tw_deschedule(tw);
625 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
626
627 tcp_tw_put(tw);
628 }
629
630 return 0;
631
632not_unique:
633 write_unlock(&head->lock);
634 return -EADDRNOTAVAIL;
635}
636
637
638
639
640static int tcp_v4_hash_connect(struct sock *sk)
641{
642 unsigned short snum = inet_sk(sk)->num;
643 struct tcp_bind_hashbucket *head;
644 struct tcp_bind_bucket *tb;
645 int ret;
646
647 if (!snum) {
648 int rover;
649 int low = sysctl_local_port_range[0];
650 int high = sysctl_local_port_range[1];
651 int remaining = (high - low) + 1;
652 struct hlist_node *node;
653 struct tcp_tw_bucket *tw = NULL;
654
655 local_bh_disable();
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670 spin_lock(&tcp_portalloc_lock);
671 rover = tcp_port_rover;
672
673 do {
674 rover++;
675 if ((rover < low) || (rover > high))
676 rover = low;
677 head = &tcp_bhash[tcp_bhashfn(rover)];
678 spin_lock(&head->lock);
679
680
681
682
683
684 tb_for_each(tb, node, &head->chain) {
685 if (tb->port == rover) {
686 BUG_TRAP(!hlist_empty(&tb->owners));
687 if (tb->fastreuse >= 0)
688 goto next_port;
689 if (!__tcp_v4_check_established(sk,
690 rover,
691 &tw))
692 goto ok;
693 goto next_port;
694 }
695 }
696
697 tb = tcp_bucket_create(head, rover);
698 if (!tb) {
699 spin_unlock(&head->lock);
700 break;
701 }
702 tb->fastreuse = -1;
703 goto ok;
704
705 next_port:
706 spin_unlock(&head->lock);
707 } while (--remaining > 0);
708 tcp_port_rover = rover;
709 spin_unlock(&tcp_portalloc_lock);
710
711 local_bh_enable();
712
713 return -EADDRNOTAVAIL;
714
715ok:
716
717 tcp_port_rover = rover;
718 spin_unlock(&tcp_portalloc_lock);
719
720 tcp_bind_hash(sk, tb, rover);
721 if (sk_unhashed(sk)) {
722 inet_sk(sk)->sport = htons(rover);
723 __tcp_v4_hash(sk, 0);
724 }
725 spin_unlock(&head->lock);
726
727 if (tw) {
728 tcp_tw_deschedule(tw);
729 tcp_tw_put(tw);
730 }
731
732 ret = 0;
733 goto out;
734 }
735
736 head = &tcp_bhash[tcp_bhashfn(snum)];
737 tb = tcp_sk(sk)->bind_hash;
738 spin_lock_bh(&head->lock);
739 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
740 __tcp_v4_hash(sk, 0);
741 spin_unlock_bh(&head->lock);
742 return 0;
743 } else {
744 spin_unlock(&head->lock);
745
746 ret = __tcp_v4_check_established(sk, snum, NULL);
747out:
748 local_bh_enable();
749 return ret;
750 }
751}
752
753
754int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
755{
756 struct inet_opt *inet = inet_sk(sk);
757 struct tcp_opt *tp = tcp_sk(sk);
758 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
759 struct rtable *rt;
760 u32 daddr, nexthop;
761 int tmp;
762 int err;
763
764 if (addr_len < sizeof(struct sockaddr_in))
765 return -EINVAL;
766
767 if (usin->sin_family != AF_INET)
768 return -EAFNOSUPPORT;
769
770 nexthop = daddr = usin->sin_addr.s_addr;
771 if (inet->opt && inet->opt->srr) {
772 if (!daddr)
773 return -EINVAL;
774 nexthop = inet->opt->faddr;
775 }
776
777 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
778 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
779 IPPROTO_TCP,
780 inet->sport, usin->sin_port, sk);
781 if (tmp < 0)
782 return tmp;
783
784 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
785 ip_rt_put(rt);
786 return -ENETUNREACH;
787 }
788
789 if (!inet->opt || !inet->opt->srr)
790 daddr = rt->rt_dst;
791
792 if (!inet->saddr)
793 inet->saddr = rt->rt_src;
794 inet->rcv_saddr = inet->saddr;
795
796 if (tp->ts_recent_stamp && inet->daddr != daddr) {
797
798 tp->ts_recent = 0;
799 tp->ts_recent_stamp = 0;
800 tp->write_seq = 0;
801 }
802
803 if (sysctl_tcp_tw_recycle &&
804 !tp->ts_recent_stamp && rt->rt_dst == daddr) {
805 struct inet_peer *peer = rt_get_peer(rt);
806
807
808
809
810
811
812 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
813 tp->ts_recent_stamp = peer->tcp_ts_stamp;
814 tp->ts_recent = peer->tcp_ts;
815 }
816 }
817
818 inet->dport = usin->sin_port;
819 inet->daddr = daddr;
820
821 tp->ext_header_len = 0;
822 if (inet->opt)
823 tp->ext_header_len = inet->opt->optlen;
824
825 tp->mss_clamp = 536;
826
827
828
829
830
831
832 tcp_set_state(sk, TCP_SYN_SENT);
833 err = tcp_v4_hash_connect(sk);
834 if (err)
835 goto failure;
836
837 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
838 if (err)
839 goto failure;
840
841
842 __sk_dst_set(sk, &rt->u.dst);
843 tcp_v4_setup_caps(sk, &rt->u.dst);
844 tp->ext2_header_len = rt->u.dst.header_len;
845
846 if (!tp->write_seq)
847 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
848 inet->daddr,
849 inet->sport,
850 usin->sin_port);
851
852 inet->id = tp->write_seq ^ jiffies;
853
854 err = tcp_connect(sk);
855 rt = NULL;
856 if (err)
857 goto failure;
858
859 return 0;
860
861failure:
862
863 tcp_set_state(sk, TCP_CLOSE);
864 ip_rt_put(rt);
865 sk->sk_route_caps = 0;
866 inet->dport = 0;
867 return err;
868}
869
870static __inline__ int tcp_v4_iif(struct sk_buff *skb)
871{
872 return ((struct rtable *)skb->dst)->rt_iif;
873}
874
875static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd)
876{
877 return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1));
878}
879
880static struct open_request *tcp_v4_search_req(struct tcp_opt *tp,
881 struct open_request ***prevp,
882 __u16 rport,
883 __u32 raddr, __u32 laddr)
884{
885 struct tcp_listen_opt *lopt = tp->listen_opt;
886 struct open_request *req, **prev;
887
888 for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)];
889 (req = *prev) != NULL;
890 prev = &req->dl_next) {
891 if (req->rmt_port == rport &&
892 req->af.v4_req.rmt_addr == raddr &&
893 req->af.v4_req.loc_addr == laddr &&
894 TCP_INET_FAMILY(req->class->family)) {
895 BUG_TRAP(!req->sk);
896 *prevp = prev;
897 break;
898 }
899 }
900
901 return req;
902}
903
904static void tcp_v4_synq_add(struct sock *sk, struct open_request *req)
905{
906 struct tcp_opt *tp = tcp_sk(sk);
907 struct tcp_listen_opt *lopt = tp->listen_opt;
908 u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
909
910 req->expires = jiffies + TCP_TIMEOUT_INIT;
911 req->retrans = 0;
912 req->sk = NULL;
913 req->dl_next = lopt->syn_table[h];
914
915 write_lock(&tp->syn_wait_lock);
916 lopt->syn_table[h] = req;
917 write_unlock(&tp->syn_wait_lock);
918
919 tcp_synq_added(sk);
920}
921
922
923
924
925
926static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
927 u32 mtu)
928{
929 struct dst_entry *dst;
930 struct inet_opt *inet = inet_sk(sk);
931 struct tcp_opt *tp = tcp_sk(sk);
932
933
934
935
936
937 if (sk->sk_state == TCP_LISTEN)
938 return;
939
940
941
942
943
944
945
946 if ((dst = __sk_dst_check(sk, 0)) == NULL)
947 return;
948
949 dst->ops->update_pmtu(dst, mtu);
950
951
952
953
954 if (mtu < dst_pmtu(dst) && ip_dont_fragment(sk, dst))
955 sk->sk_err_soft = EMSGSIZE;
956
957 mtu = dst_pmtu(dst);
958
959 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
960 tp->pmtu_cookie > mtu) {
961 tcp_sync_mss(sk, mtu);
962
963
964
965
966
967
968 tcp_simple_retransmit(sk);
969 }
970}
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988void tcp_v4_err(struct sk_buff *skb, u32 info)
989{
990 struct iphdr *iph = (struct iphdr *)skb->data;
991 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
992 struct tcp_opt *tp;
993 struct inet_opt *inet;
994 int type = skb->h.icmph->type;
995 int code = skb->h.icmph->code;
996 struct sock *sk;
997 __u32 seq;
998 int err;
999
1000 if (skb->len < (iph->ihl << 2) + 8) {
1001 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
1002 return;
1003 }
1004
1005 sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
1006 th->source, tcp_v4_iif(skb));
1007 if (!sk) {
1008 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
1009 return;
1010 }
1011 if (sk->sk_state == TCP_TIME_WAIT) {
1012 tcp_tw_put((struct tcp_tw_bucket *)sk);
1013 return;
1014 }
1015
1016 bh_lock_sock(sk);
1017
1018
1019
1020 if (sock_owned_by_user(sk))
1021 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
1022
1023 if (sk->sk_state == TCP_CLOSE)
1024 goto out;
1025
1026 tp = tcp_sk(sk);
1027 seq = ntohl(th->seq);
1028 if (sk->sk_state != TCP_LISTEN &&
1029 !between(seq, tp->snd_una, tp->snd_nxt)) {
1030 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
1031 goto out;
1032 }
1033
1034 switch (type) {
1035 case ICMP_SOURCE_QUENCH:
1036
1037 goto out;
1038 case ICMP_PARAMETERPROB:
1039 err = EPROTO;
1040 break;
1041 case ICMP_DEST_UNREACH:
1042 if (code > NR_ICMP_UNREACH)
1043 goto out;
1044
1045 if (code == ICMP_FRAG_NEEDED) {
1046 if (!sock_owned_by_user(sk))
1047 do_pmtu_discovery(sk, iph, info);
1048 goto out;
1049 }
1050
1051 err = icmp_err_convert[code].errno;
1052 break;
1053 case ICMP_TIME_EXCEEDED:
1054 err = EHOSTUNREACH;
1055 break;
1056 default:
1057 goto out;
1058 }
1059
1060 switch (sk->sk_state) {
1061 struct open_request *req, **prev;
1062 case TCP_LISTEN:
1063 if (sock_owned_by_user(sk))
1064 goto out;
1065
1066 req = tcp_v4_search_req(tp, &prev, th->dest,
1067 iph->daddr, iph->saddr);
1068 if (!req)
1069 goto out;
1070
1071
1072
1073
1074 BUG_TRAP(!req->sk);
1075
1076 if (seq != req->snt_isn) {
1077 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1078 goto out;
1079 }
1080
1081
1082
1083
1084
1085
1086
1087 tcp_synq_drop(sk, req, prev);
1088 goto out;
1089
1090 case TCP_SYN_SENT:
1091 case TCP_SYN_RECV:
1092
1093
1094 if (!sock_owned_by_user(sk)) {
1095 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1096 sk->sk_err = err;
1097
1098 sk->sk_error_report(sk);
1099
1100 tcp_done(sk);
1101 } else {
1102 sk->sk_err_soft = err;
1103 }
1104 goto out;
1105 }
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123 inet = inet_sk(sk);
1124 if (!sock_owned_by_user(sk) && inet->recverr) {
1125 sk->sk_err = err;
1126 sk->sk_error_report(sk);
1127 } else {
1128 sk->sk_err_soft = err;
1129 }
1130
1131out:
1132 bh_unlock_sock(sk);
1133 sock_put(sk);
1134}
1135
1136
1137void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
1138 struct sk_buff *skb)
1139{
1140 struct inet_opt *inet = inet_sk(sk);
1141
1142 if (skb->ip_summed == CHECKSUM_HW) {
1143 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
1144 skb->csum = offsetof(struct tcphdr, check);
1145 } else {
1146 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
1147 csum_partial((char *)th,
1148 th->doff << 2,
1149 skb->csum));
1150 }
1151}
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166static void tcp_v4_send_reset(struct sk_buff *skb)
1167{
1168 struct tcphdr *th = skb->h.th;
1169 struct tcphdr rth;
1170 struct ip_reply_arg arg;
1171
1172
1173 if (th->rst)
1174 return;
1175
1176 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
1177 return;
1178
1179
1180 memset(&rth, 0, sizeof(struct tcphdr));
1181 rth.dest = th->source;
1182 rth.source = th->dest;
1183 rth.doff = sizeof(struct tcphdr) / 4;
1184 rth.rst = 1;
1185
1186 if (th->ack) {
1187 rth.seq = th->ack_seq;
1188 } else {
1189 rth.ack = 1;
1190 rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
1191 skb->len - (th->doff << 2));
1192 }
1193
1194 memset(&arg, 0, sizeof arg);
1195 arg.iov[0].iov_base = (unsigned char *)&rth;
1196 arg.iov[0].iov_len = sizeof rth;
1197 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1198 skb->nh.iph->saddr,
1199 sizeof(struct tcphdr), IPPROTO_TCP, 0);
1200 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1201
1202 ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
1203
1204 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1205 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1206}
1207
1208
1209
1210
1211
1212static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
1213 u32 win, u32 ts)
1214{
1215 struct tcphdr *th = skb->h.th;
1216 struct {
1217 struct tcphdr th;
1218 u32 tsopt[3];
1219 } rep;
1220 struct ip_reply_arg arg;
1221
1222 memset(&rep.th, 0, sizeof(struct tcphdr));
1223 memset(&arg, 0, sizeof arg);
1224
1225 arg.iov[0].iov_base = (unsigned char *)&rep;
1226 arg.iov[0].iov_len = sizeof(rep.th);
1227 if (ts) {
1228 rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1229 (TCPOPT_TIMESTAMP << 8) |
1230 TCPOLEN_TIMESTAMP);
1231 rep.tsopt[1] = htonl(tcp_time_stamp);
1232 rep.tsopt[2] = htonl(ts);
1233 arg.iov[0].iov_len = sizeof(rep);
1234 }
1235
1236
1237 rep.th.dest = th->source;
1238 rep.th.source = th->dest;
1239 rep.th.doff = arg.iov[0].iov_len / 4;
1240 rep.th.seq = htonl(seq);
1241 rep.th.ack_seq = htonl(ack);
1242 rep.th.ack = 1;
1243 rep.th.window = htons(win);
1244
1245 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
1246 skb->nh.iph->saddr,
1247 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1248 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
1249
1250 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1251
1252 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1253}
1254
1255static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1256{
1257 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1258
1259 tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1260 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1261
1262 tcp_tw_put(tw);
1263}
1264
1265static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
1266{
1267 tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,
1268 req->ts_recent);
1269}
1270
1271static struct dst_entry* tcp_v4_route_req(struct sock *sk,
1272 struct open_request *req)
1273{
1274 struct rtable *rt;
1275 struct ip_options *opt = req->af.v4_req.opt;
1276 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1277 .nl_u = { .ip4_u =
1278 { .daddr = ((opt && opt->srr) ?
1279 opt->faddr :
1280 req->af.v4_req.rmt_addr),
1281 .saddr = req->af.v4_req.loc_addr,
1282 .tos = RT_CONN_FLAGS(sk) } },
1283 .proto = IPPROTO_TCP,
1284 .uli_u = { .ports =
1285 { .sport = inet_sk(sk)->sport,
1286 .dport = req->rmt_port } } };
1287
1288 if (ip_route_output_flow(&rt, &fl, sk, 0)) {
1289 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1290 return NULL;
1291 }
1292 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
1293 ip_rt_put(rt);
1294 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
1295 return NULL;
1296 }
1297 return &rt->u.dst;
1298}
1299
1300
1301
1302
1303
1304
1305static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
1306 struct dst_entry *dst)
1307{
1308 int err = -1;
1309 struct sk_buff * skb;
1310
1311
1312 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1313 goto out;
1314
1315 skb = tcp_make_synack(sk, dst, req);
1316
1317 if (skb) {
1318 struct tcphdr *th = skb->h.th;
1319
1320 th->check = tcp_v4_check(th, skb->len,
1321 req->af.v4_req.loc_addr,
1322 req->af.v4_req.rmt_addr,
1323 csum_partial((char *)th, skb->len,
1324 skb->csum));
1325
1326 err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
1327 req->af.v4_req.rmt_addr,
1328 req->af.v4_req.opt);
1329 if (err == NET_XMIT_CN)
1330 err = 0;
1331 }
1332
1333out:
1334 dst_release(dst);
1335 return err;
1336}
1337
1338
1339
1340
1341static void tcp_v4_or_free(struct open_request *req)
1342{
1343 if (req->af.v4_req.opt)
1344 kfree(req->af.v4_req.opt);
1345}
1346
1347static inline void syn_flood_warning(struct sk_buff *skb)
1348{
1349 static unsigned long warntime;
1350
1351 if (time_after(jiffies, (warntime + HZ * 60))) {
1352 warntime = jiffies;
1353 printk(KERN_INFO
1354 "possible SYN flooding on port %d. Sending cookies.\n",
1355 ntohs(skb->h.th->dest));
1356 }
1357}
1358
1359
1360
1361
1362static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
1363 struct sk_buff *skb)
1364{
1365 struct ip_options *opt = &(IPCB(skb)->opt);
1366 struct ip_options *dopt = NULL;
1367
1368 if (opt && opt->optlen) {
1369 int opt_size = optlength(opt);
1370 dopt = kmalloc(opt_size, GFP_ATOMIC);
1371 if (dopt) {
1372 if (ip_options_echo(dopt, skb)) {
1373 kfree(dopt);
1374 dopt = NULL;
1375 }
1376 }
1377 }
1378 return dopt;
1379}
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394int sysctl_max_syn_backlog = 256;
1395
1396struct or_calltable or_ipv4 = {
1397 .family = PF_INET,
1398 .rtx_syn_ack = tcp_v4_send_synack,
1399 .send_ack = tcp_v4_or_send_ack,
1400 .destructor = tcp_v4_or_free,
1401 .send_reset = tcp_v4_send_reset,
1402};
1403
1404int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1405{
1406 struct tcp_opt tp;
1407 struct open_request *req;
1408 __u32 saddr = skb->nh.iph->saddr;
1409 __u32 daddr = skb->nh.iph->daddr;
1410 __u32 isn = TCP_SKB_CB(skb)->when;
1411 struct dst_entry *dst = NULL;
1412#ifdef CONFIG_SYN_COOKIES
1413 int want_cookie = 0;
1414#else
1415#define want_cookie 0
1416#endif
1417
1418
1419 if (((struct rtable *)skb->dst)->rt_flags &
1420 (RTCF_BROADCAST | RTCF_MULTICAST))
1421 goto drop;
1422
1423
1424
1425
1426
1427 if (tcp_synq_is_full(sk) && !isn) {
1428#ifdef CONFIG_SYN_COOKIES
1429 if (sysctl_tcp_syncookies) {
1430 want_cookie = 1;
1431 } else
1432#endif
1433 goto drop;
1434 }
1435
1436
1437
1438
1439
1440
1441 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1442 goto drop;
1443
1444 req = tcp_openreq_alloc();
1445 if (!req)
1446 goto drop;
1447
1448 tcp_clear_options(&tp);
1449 tp.mss_clamp = 536;
1450 tp.user_mss = tcp_sk(sk)->user_mss;
1451
1452 tcp_parse_options(skb, &tp, 0);
1453
1454 if (want_cookie) {
1455 tcp_clear_options(&tp);
1456 tp.saw_tstamp = 0;
1457 }
1458
1459 if (tp.saw_tstamp && !tp.rcv_tsval) {
1460
1461
1462
1463
1464
1465 tp.saw_tstamp = 0;
1466 tp.tstamp_ok = 0;
1467 }
1468 tp.tstamp_ok = tp.saw_tstamp;
1469
1470 tcp_openreq_init(req, &tp, skb);
1471
1472 req->af.v4_req.loc_addr = daddr;
1473 req->af.v4_req.rmt_addr = saddr;
1474 req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
1475 req->class = &or_ipv4;
1476 if (!want_cookie)
1477 TCP_ECN_create_request(req, skb->h.th);
1478
1479 if (want_cookie) {
1480#ifdef CONFIG_SYN_COOKIES
1481 syn_flood_warning(skb);
1482#endif
1483 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1484 } else if (!isn) {
1485 struct inet_peer *peer = NULL;
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496 if (tp.saw_tstamp &&
1497 sysctl_tcp_tw_recycle &&
1498 (dst = tcp_v4_route_req(sk, req)) != NULL &&
1499 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1500 peer->v4daddr == saddr) {
1501 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1502 (s32)(peer->tcp_ts - req->ts_recent) >
1503 TCP_PAWS_WINDOW) {
1504 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1505 dst_release(dst);
1506 goto drop_and_free;
1507 }
1508 }
1509
1510 else if (!sysctl_tcp_syncookies &&
1511 (sysctl_max_syn_backlog - tcp_synq_len(sk) <
1512 (sysctl_max_syn_backlog >> 2)) &&
1513 (!peer || !peer->tcp_ts_stamp) &&
1514 (!dst || !dst_metric(dst, RTAX_RTT))) {
1515
1516
1517
1518
1519
1520
1521
1522 NETDEBUG(if (net_ratelimit()) \
1523 printk(KERN_DEBUG "TCP: drop open "
1524 "request from %u.%u."
1525 "%u.%u/%u\n", \
1526 NIPQUAD(saddr),
1527 ntohs(skb->h.th->source)));
1528 dst_release(dst);
1529 goto drop_and_free;
1530 }
1531
1532 isn = tcp_v4_init_sequence(sk, skb);
1533 }
1534 req->snt_isn = isn;
1535
1536 if (tcp_v4_send_synack(sk, req, dst))
1537 goto drop_and_free;
1538
1539 if (want_cookie) {
1540 tcp_openreq_free(req);
1541 } else {
1542 tcp_v4_synq_add(sk, req);
1543 }
1544 return 0;
1545
1546drop_and_free:
1547 tcp_openreq_free(req);
1548drop:
1549 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1550 return 0;
1551}
1552
1553
1554
1555
1556
1557
1558struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1559 struct open_request *req,
1560 struct dst_entry *dst)
1561{
1562 struct inet_opt *newinet;
1563 struct tcp_opt *newtp;
1564 struct sock *newsk;
1565
1566 if (sk_acceptq_is_full(sk))
1567 goto exit_overflow;
1568
1569 if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
1570 goto exit;
1571
1572 newsk = tcp_create_openreq_child(sk, req, skb);
1573 if (!newsk)
1574 goto exit;
1575
1576 newsk->sk_dst_cache = dst;
1577 tcp_v4_setup_caps(newsk, dst);
1578
1579 newtp = tcp_sk(newsk);
1580 newinet = inet_sk(newsk);
1581 newinet->daddr = req->af.v4_req.rmt_addr;
1582 newinet->rcv_saddr = req->af.v4_req.loc_addr;
1583 newinet->saddr = req->af.v4_req.loc_addr;
1584 newinet->opt = req->af.v4_req.opt;
1585 req->af.v4_req.opt = NULL;
1586 newinet->mc_index = tcp_v4_iif(skb);
1587 newinet->mc_ttl = skb->nh.iph->ttl;
1588 newtp->ext_header_len = 0;
1589 if (newinet->opt)
1590 newtp->ext_header_len = newinet->opt->optlen;
1591 newtp->ext2_header_len = dst->header_len;
1592 newinet->id = newtp->write_seq ^ jiffies;
1593
1594 tcp_sync_mss(newsk, dst_pmtu(dst));
1595 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1596 tcp_initialize_rcv_mss(newsk);
1597
1598 __tcp_v4_hash(newsk, 0);
1599 __tcp_inherit_port(sk, newsk);
1600
1601 return newsk;
1602
1603exit_overflow:
1604 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1605exit:
1606 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1607 dst_release(dst);
1608 return NULL;
1609}
1610
1611static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1612{
1613 struct tcphdr *th = skb->h.th;
1614 struct iphdr *iph = skb->nh.iph;
1615 struct tcp_opt *tp = tcp_sk(sk);
1616 struct sock *nsk;
1617 struct open_request **prev;
1618
1619 struct open_request *req = tcp_v4_search_req(tp, &prev, th->source,
1620 iph->saddr, iph->daddr);
1621 if (req)
1622 return tcp_check_req(sk, skb, req, prev);
1623
1624 nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
1625 th->source,
1626 skb->nh.iph->daddr,
1627 ntohs(th->dest),
1628 tcp_v4_iif(skb));
1629
1630 if (nsk) {
1631 if (nsk->sk_state != TCP_TIME_WAIT) {
1632 bh_lock_sock(nsk);
1633 return nsk;
1634 }
1635 tcp_tw_put((struct tcp_tw_bucket *)nsk);
1636 return NULL;
1637 }
1638
1639#ifdef CONFIG_SYN_COOKIES
1640 if (!th->rst && !th->syn && th->ack)
1641 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1642#endif
1643 return sk;
1644}
1645
1646static int tcp_v4_checksum_init(struct sk_buff *skb)
1647{
1648 if (skb->ip_summed == CHECKSUM_HW) {
1649 skb->ip_summed = CHECKSUM_UNNECESSARY;
1650 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1651 skb->nh.iph->daddr, skb->csum))
1652 return 0;
1653
1654 NETDEBUG(if (net_ratelimit())
1655 printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
1656 skb->ip_summed = CHECKSUM_NONE;
1657 }
1658 if (skb->len <= 76) {
1659 if (tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
1660 skb->nh.iph->daddr,
1661 skb_checksum(skb, 0, skb->len, 0)))
1662 return -1;
1663 skb->ip_summed = CHECKSUM_UNNECESSARY;
1664 } else {
1665 skb->csum = ~tcp_v4_check(skb->h.th, skb->len,
1666 skb->nh.iph->saddr,
1667 skb->nh.iph->daddr, 0);
1668 }
1669 return 0;
1670}
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1682{
1683 if (sk->sk_state == TCP_ESTABLISHED) {
1684 TCP_CHECK_TIMER(sk);
1685 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1686 goto reset;
1687 TCP_CHECK_TIMER(sk);
1688 return 0;
1689 }
1690
1691 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1692 goto csum_err;
1693
1694 if (sk->sk_state == TCP_LISTEN) {
1695 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1696 if (!nsk)
1697 goto discard;
1698
1699 if (nsk != sk) {
1700 if (tcp_child_process(sk, nsk, skb))
1701 goto reset;
1702 return 0;
1703 }
1704 }
1705
1706 TCP_CHECK_TIMER(sk);
1707 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1708 goto reset;
1709 TCP_CHECK_TIMER(sk);
1710 return 0;
1711
1712reset:
1713 tcp_v4_send_reset(skb);
1714discard:
1715 kfree_skb(skb);
1716
1717
1718
1719
1720
1721 return 0;
1722
1723csum_err:
1724 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1725 goto discard;
1726}
1727
1728
1729
1730
1731
1732int tcp_v4_rcv(struct sk_buff *skb)
1733{
1734 struct tcphdr *th;
1735 struct sock *sk;
1736 int ret;
1737
1738 if (skb->pkt_type != PACKET_HOST)
1739 goto discard_it;
1740
1741
1742 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1743
1744 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1745 goto discard_it;
1746
1747 th = skb->h.th;
1748
1749 if (th->doff < sizeof(struct tcphdr) / 4)
1750 goto bad_packet;
1751 if (!pskb_may_pull(skb, th->doff * 4))
1752 goto discard_it;
1753
1754
1755
1756
1757
1758 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1759 tcp_v4_checksum_init(skb) < 0))
1760 goto bad_packet;
1761
1762 th = skb->h.th;
1763 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1764 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1765 skb->len - th->doff * 4);
1766 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1767 TCP_SKB_CB(skb)->when = 0;
1768 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1769 TCP_SKB_CB(skb)->sacked = 0;
1770
1771 sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
1772 skb->nh.iph->daddr, ntohs(th->dest),
1773 tcp_v4_iif(skb));
1774
1775 if (!sk)
1776 goto no_tcp_socket;
1777
1778process:
1779 if (sk->sk_state == TCP_TIME_WAIT)
1780 goto do_time_wait;
1781
1782 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1783 goto discard_and_relse;
1784
1785 if (sk_filter(sk, skb, 0))
1786 goto discard_and_relse;
1787
1788 skb->dev = NULL;
1789
1790 bh_lock_sock(sk);
1791 ret = 0;
1792 if (!sock_owned_by_user(sk)) {
1793 if (!tcp_prequeue(sk, skb))
1794 ret = tcp_v4_do_rcv(sk, skb);
1795 } else
1796 sk_add_backlog(sk, skb);
1797 bh_unlock_sock(sk);
1798
1799 sock_put(sk);
1800
1801 return ret;
1802
1803no_tcp_socket:
1804 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1805 goto discard_it;
1806
1807 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1808bad_packet:
1809 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1810 } else {
1811 tcp_v4_send_reset(skb);
1812 }
1813
1814discard_it:
1815
1816 kfree_skb(skb);
1817 return 0;
1818
1819discard_and_relse:
1820 sock_put(sk);
1821 goto discard_it;
1822
1823do_time_wait:
1824 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1825 tcp_tw_put((struct tcp_tw_bucket *) sk);
1826 goto discard_it;
1827 }
1828
1829 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1830 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1831 tcp_tw_put((struct tcp_tw_bucket *) sk);
1832 goto discard_it;
1833 }
1834 switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1835 skb, th, skb->len)) {
1836 case TCP_TW_SYN: {
1837 struct sock *sk2 = tcp_v4_lookup_listener(skb->nh.iph->daddr,
1838 ntohs(th->dest),
1839 tcp_v4_iif(skb));
1840 if (sk2) {
1841 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1842 tcp_tw_put((struct tcp_tw_bucket *)sk);
1843 sk = sk2;
1844 goto process;
1845 }
1846
1847 }
1848 case TCP_TW_ACK:
1849 tcp_v4_timewait_ack(sk, skb);
1850 break;
1851 case TCP_TW_RST:
1852 goto no_tcp_socket;
1853 case TCP_TW_SUCCESS:;
1854 }
1855 goto discard_it;
1856}
1857
1858
1859
1860
1861static void __tcp_v4_rehash(struct sock *sk)
1862{
1863 sk->sk_prot->unhash(sk);
1864 sk->sk_prot->hash(sk);
1865}
1866
1867static int tcp_v4_reselect_saddr(struct sock *sk)
1868{
1869 struct inet_opt *inet = inet_sk(sk);
1870 int err;
1871 struct rtable *rt;
1872 __u32 old_saddr = inet->saddr;
1873 __u32 new_saddr;
1874 __u32 daddr = inet->daddr;
1875
1876 if (inet->opt && inet->opt->srr)
1877 daddr = inet->opt->faddr;
1878
1879
1880 err = ip_route_connect(&rt, daddr, 0,
1881 RT_TOS(inet->tos) | sk->sk_localroute,
1882 sk->sk_bound_dev_if,
1883 IPPROTO_TCP,
1884 inet->sport, inet->dport, sk);
1885 if (err)
1886 return err;
1887
1888 __sk_dst_set(sk, &rt->u.dst);
1889 tcp_v4_setup_caps(sk, &rt->u.dst);
1890 tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1891
1892 new_saddr = rt->rt_src;
1893
1894 if (new_saddr == old_saddr)
1895 return 0;
1896
1897 if (sysctl_ip_dynaddr > 1) {
1898 printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->"
1899 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
1900 NIPQUAD(old_saddr),
1901 NIPQUAD(new_saddr));
1902 }
1903
1904 inet->saddr = new_saddr;
1905 inet->rcv_saddr = new_saddr;
1906
1907
1908
1909
1910
1911
1912
1913
1914 __tcp_v4_rehash(sk);
1915 return 0;
1916}
1917
1918int tcp_v4_rebuild_header(struct sock *sk)
1919{
1920 struct inet_opt *inet = inet_sk(sk);
1921 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
1922 u32 daddr;
1923 int err;
1924
1925
1926 if (rt)
1927 return 0;
1928
1929
1930 daddr = inet->daddr;
1931 if (inet->opt && inet->opt->srr)
1932 daddr = inet->opt->faddr;
1933
1934 {
1935 struct flowi fl = { .oif = sk->sk_bound_dev_if,
1936 .nl_u = { .ip4_u =
1937 { .daddr = daddr,
1938 .saddr = inet->saddr,
1939 .tos = RT_CONN_FLAGS(sk) } },
1940 .proto = IPPROTO_TCP,
1941 .uli_u = { .ports =
1942 { .sport = inet->sport,
1943 .dport = inet->dport } } };
1944
1945 err = ip_route_output_flow(&rt, &fl, sk, 0);
1946 }
1947 if (!err) {
1948 __sk_dst_set(sk, &rt->u.dst);
1949 tcp_v4_setup_caps(sk, &rt->u.dst);
1950 tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
1951 return 0;
1952 }
1953
1954
1955 sk->sk_route_caps = 0;
1956
1957 if (!sysctl_ip_dynaddr ||
1958 sk->sk_state != TCP_SYN_SENT ||
1959 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
1960 (err = tcp_v4_reselect_saddr(sk)) != 0)
1961 sk->sk_err_soft = -err;
1962
1963 return err;
1964}
1965
1966static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1967{
1968 struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
1969 struct inet_opt *inet = inet_sk(sk);
1970
1971 sin->sin_family = AF_INET;
1972 sin->sin_addr.s_addr = inet->daddr;
1973 sin->sin_port = inet->dport;
1974}
1975
1976
1977
1978
1979
1980
1981
1982int tcp_v4_remember_stamp(struct sock *sk)
1983{
1984 struct inet_opt *inet = inet_sk(sk);
1985 struct tcp_opt *tp = tcp_sk(sk);
1986 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1987 struct inet_peer *peer = NULL;
1988 int release_it = 0;
1989
1990 if (!rt || rt->rt_dst != inet->daddr) {
1991 peer = inet_getpeer(inet->daddr, 1);
1992 release_it = 1;
1993 } else {
1994 if (!rt->peer)
1995 rt_bind_peer(rt, 1);
1996 peer = rt->peer;
1997 }
1998
1999 if (peer) {
2000 if ((s32)(peer->tcp_ts - tp->ts_recent) <= 0 ||
2001 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2002 peer->tcp_ts_stamp <= tp->ts_recent_stamp)) {
2003 peer->tcp_ts_stamp = tp->ts_recent_stamp;
2004 peer->tcp_ts = tp->ts_recent;
2005 }
2006 if (release_it)
2007 inet_putpeer(peer);
2008 return 1;
2009 }
2010
2011 return 0;
2012}
2013
2014int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw)
2015{
2016 struct inet_peer *peer = NULL;
2017
2018 peer = inet_getpeer(tw->tw_daddr, 1);
2019
2020 if (peer) {
2021 if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 ||
2022 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
2023 peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) {
2024 peer->tcp_ts_stamp = tw->tw_ts_recent_stamp;
2025 peer->tcp_ts = tw->tw_ts_recent;
2026 }
2027 inet_putpeer(peer);
2028 return 1;
2029 }
2030
2031 return 0;
2032}
2033
2034struct tcp_func ipv4_specific = {
2035 .queue_xmit = ip_queue_xmit,
2036 .send_check = tcp_v4_send_check,
2037 .rebuild_header = tcp_v4_rebuild_header,
2038 .conn_request = tcp_v4_conn_request,
2039 .syn_recv_sock = tcp_v4_syn_recv_sock,
2040 .remember_stamp = tcp_v4_remember_stamp,
2041 .net_header_len = sizeof(struct iphdr),
2042 .setsockopt = ip_setsockopt,
2043 .getsockopt = ip_getsockopt,
2044 .addr2sockaddr = v4_addr2sockaddr,
2045 .sockaddr_len = sizeof(struct sockaddr_in),
2046};
2047
2048
2049
2050
2051static int tcp_v4_init_sock(struct sock *sk)
2052{
2053 struct tcp_opt *tp = tcp_sk(sk);
2054
2055 skb_queue_head_init(&tp->out_of_order_queue);
2056 tcp_init_xmit_timers(sk);
2057 tcp_prequeue_init(tp);
2058
2059 tp->rto = TCP_TIMEOUT_INIT;
2060 tp->mdev = TCP_TIMEOUT_INIT;
2061
2062
2063
2064
2065
2066
2067 tp->snd_cwnd = 2;
2068
2069
2070
2071
2072 tp->snd_ssthresh = 0x7fffffff;
2073 tp->snd_cwnd_clamp = ~0;
2074 tp->mss_cache_std = tp->mss_cache = 536;
2075
2076 tp->reordering = sysctl_tcp_reordering;
2077
2078 sk->sk_state = TCP_CLOSE;
2079
2080 sk->sk_write_space = sk_stream_write_space;
2081 sk->sk_use_write_queue = 1;
2082
2083 tp->af_specific = &ipv4_specific;
2084
2085 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2086 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2087
2088 atomic_inc(&tcp_sockets_allocated);
2089
2090 return 0;
2091}
2092
2093int tcp_v4_destroy_sock(struct sock *sk)
2094{
2095 struct tcp_opt *tp = tcp_sk(sk);
2096
2097 tcp_clear_xmit_timers(sk);
2098
2099
2100 sk_stream_writequeue_purge(sk);
2101
2102
2103 __skb_queue_purge(&tp->out_of_order_queue);
2104
2105
2106 __skb_queue_purge(&tp->ucopy.prequeue);
2107
2108
2109 if (tp->bind_hash)
2110 tcp_put_port(sk);
2111
2112
2113
2114
2115 if (sk->sk_sndmsg_page) {
2116 __free_page(sk->sk_sndmsg_page);
2117 sk->sk_sndmsg_page = NULL;
2118 }
2119
2120 atomic_dec(&tcp_sockets_allocated);
2121
2122 return 0;
2123}
2124
2125EXPORT_SYMBOL(tcp_v4_destroy_sock);
2126
2127#ifdef CONFIG_PROC_FS
2128
2129
2130static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head)
2131{
2132 return hlist_empty(head) ? NULL :
2133 list_entry(head->first, struct tcp_tw_bucket, tw_node);
2134}
2135
2136static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw)
2137{
2138 return tw->tw_node.next ?
2139 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2140}
2141
2142static void *listening_get_next(struct seq_file *seq, void *cur)
2143{
2144 struct tcp_opt *tp;
2145 struct hlist_node *node;
2146 struct sock *sk = cur;
2147 struct tcp_iter_state* st = seq->private;
2148
2149 if (!sk) {
2150 st->bucket = 0;
2151 sk = sk_head(&tcp_listening_hash[0]);
2152 goto get_sk;
2153 }
2154
2155 ++st->num;
2156
2157 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2158 struct open_request *req = cur;
2159
2160 tp = tcp_sk(st->syn_wait_sk);
2161 req = req->dl_next;
2162 while (1) {
2163 while (req) {
2164 if (req->class->family == st->family) {
2165 cur = req;
2166 goto out;
2167 }
2168 req = req->dl_next;
2169 }
2170 if (++st->sbucket >= TCP_SYNQ_HSIZE)
2171 break;
2172get_req:
2173 req = tp->listen_opt->syn_table[st->sbucket];
2174 }
2175 sk = sk_next(st->syn_wait_sk);
2176 st->state = TCP_SEQ_STATE_LISTENING;
2177 read_unlock_bh(&tp->syn_wait_lock);
2178 } else {
2179 tp = tcp_sk(sk);
2180 read_lock_bh(&tp->syn_wait_lock);
2181 if (tp->listen_opt && tp->listen_opt->qlen)
2182 goto start_req;
2183 read_unlock_bh(&tp->syn_wait_lock);
2184 sk = sk_next(sk);
2185 }
2186get_sk:
2187 sk_for_each_from(sk, node) {
2188 if (sk->sk_family == st->family) {
2189 cur = sk;
2190 goto out;
2191 }
2192 tp = tcp_sk(sk);
2193 read_lock_bh(&tp->syn_wait_lock);
2194 if (tp->listen_opt && tp->listen_opt->qlen) {
2195start_req:
2196 st->uid = sock_i_uid(sk);
2197 st->syn_wait_sk = sk;
2198 st->state = TCP_SEQ_STATE_OPENREQ;
2199 st->sbucket = 0;
2200 goto get_req;
2201 }
2202 read_unlock_bh(&tp->syn_wait_lock);
2203 }
2204 if (++st->bucket < TCP_LHTABLE_SIZE) {
2205 sk = sk_head(&tcp_listening_hash[st->bucket]);
2206 goto get_sk;
2207 }
2208 cur = NULL;
2209out:
2210 return cur;
2211}
2212
2213static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2214{
2215 void *rc = listening_get_next(seq, NULL);
2216
2217 while (rc && *pos) {
2218 rc = listening_get_next(seq, rc);
2219 --*pos;
2220 }
2221 return rc;
2222}
2223
2224static void *established_get_first(struct seq_file *seq)
2225{
2226 struct tcp_iter_state* st = seq->private;
2227 void *rc = NULL;
2228
2229 for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) {
2230 struct sock *sk;
2231 struct hlist_node *node;
2232 struct tcp_tw_bucket *tw;
2233
2234 read_lock(&tcp_ehash[st->bucket].lock);
2235 sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) {
2236 if (sk->sk_family != st->family) {
2237 continue;
2238 }
2239 rc = sk;
2240 goto out;
2241 }
2242 st->state = TCP_SEQ_STATE_TIME_WAIT;
2243 tw_for_each(tw, node,
2244 &tcp_ehash[st->bucket + tcp_ehash_size].chain) {
2245 if (tw->tw_family != st->family) {
2246 continue;
2247 }
2248 rc = tw;
2249 goto out;
2250 }
2251 read_unlock(&tcp_ehash[st->bucket].lock);
2252 st->state = TCP_SEQ_STATE_ESTABLISHED;
2253 }
2254out:
2255 return rc;
2256}
2257
2258static void *established_get_next(struct seq_file *seq, void *cur)
2259{
2260 struct sock *sk = cur;
2261 struct tcp_tw_bucket *tw;
2262 struct hlist_node *node;
2263 struct tcp_iter_state* st = seq->private;
2264
2265 ++st->num;
2266
2267 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2268 tw = cur;
2269 tw = tw_next(tw);
2270get_tw:
2271 while (tw && tw->tw_family != st->family) {
2272 tw = tw_next(tw);
2273 }
2274 if (tw) {
2275 cur = tw;
2276 goto out;
2277 }
2278 read_unlock(&tcp_ehash[st->bucket].lock);
2279 st->state = TCP_SEQ_STATE_ESTABLISHED;
2280 if (++st->bucket < tcp_ehash_size) {
2281 read_lock(&tcp_ehash[st->bucket].lock);
2282 sk = sk_head(&tcp_ehash[st->bucket].chain);
2283 } else {
2284 cur = NULL;
2285 goto out;
2286 }
2287 } else
2288 sk = sk_next(sk);
2289
2290 sk_for_each_from(sk, node) {
2291 if (sk->sk_family == st->family)
2292 goto found;
2293 }
2294
2295 st->state = TCP_SEQ_STATE_TIME_WAIT;
2296 tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain);
2297 goto get_tw;
2298found:
2299 cur = sk;
2300out:
2301 return cur;
2302}
2303
2304static void *established_get_idx(struct seq_file *seq, loff_t pos)
2305{
2306 void *rc = established_get_first(seq);
2307
2308 while (rc && pos) {
2309 rc = established_get_next(seq, rc);
2310 --pos;
2311 }
2312 return rc;
2313}
2314
2315static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2316{
2317 void *rc;
2318 struct tcp_iter_state* st = seq->private;
2319
2320 tcp_listen_lock();
2321 st->state = TCP_SEQ_STATE_LISTENING;
2322 rc = listening_get_idx(seq, &pos);
2323
2324 if (!rc) {
2325 tcp_listen_unlock();
2326 local_bh_disable();
2327 st->state = TCP_SEQ_STATE_ESTABLISHED;
2328 rc = established_get_idx(seq, pos);
2329 }
2330
2331 return rc;
2332}
2333
2334static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2335{
2336 struct tcp_iter_state* st = seq->private;
2337 st->state = TCP_SEQ_STATE_LISTENING;
2338 st->num = 0;
2339 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2340}
2341
2342static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2343{
2344 void *rc = NULL;
2345 struct tcp_iter_state* st;
2346
2347 if (v == SEQ_START_TOKEN) {
2348 rc = tcp_get_idx(seq, 0);
2349 goto out;
2350 }
2351 st = seq->private;
2352
2353 switch (st->state) {
2354 case TCP_SEQ_STATE_OPENREQ:
2355 case TCP_SEQ_STATE_LISTENING:
2356 rc = listening_get_next(seq, v);
2357 if (!rc) {
2358 tcp_listen_unlock();
2359 local_bh_disable();
2360 st->state = TCP_SEQ_STATE_ESTABLISHED;
2361 rc = established_get_first(seq);
2362 }
2363 break;
2364 case TCP_SEQ_STATE_ESTABLISHED:
2365 case TCP_SEQ_STATE_TIME_WAIT:
2366 rc = established_get_next(seq, v);
2367 break;
2368 }
2369out:
2370 ++*pos;
2371 return rc;
2372}
2373
2374static void tcp_seq_stop(struct seq_file *seq, void *v)
2375{
2376 struct tcp_iter_state* st = seq->private;
2377
2378 switch (st->state) {
2379 case TCP_SEQ_STATE_OPENREQ:
2380 if (v) {
2381 struct tcp_opt *tp = tcp_sk(st->syn_wait_sk);
2382 read_unlock_bh(&tp->syn_wait_lock);
2383 }
2384 case TCP_SEQ_STATE_LISTENING:
2385 if (v != SEQ_START_TOKEN)
2386 tcp_listen_unlock();
2387 break;
2388 case TCP_SEQ_STATE_TIME_WAIT:
2389 case TCP_SEQ_STATE_ESTABLISHED:
2390 if (v)
2391 read_unlock(&tcp_ehash[st->bucket].lock);
2392 local_bh_enable();
2393 break;
2394 }
2395}
2396
2397static int tcp_seq_open(struct inode *inode, struct file *file)
2398{
2399 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2400 struct seq_file *seq;
2401 struct tcp_iter_state *s;
2402 int rc;
2403
2404 if (unlikely(afinfo == NULL))
2405 return -EINVAL;
2406
2407 s = kmalloc(sizeof(*s), GFP_KERNEL);
2408 if (!s)
2409 return -ENOMEM;
2410 memset(s, 0, sizeof(*s));
2411 s->family = afinfo->family;
2412 s->seq_ops.start = tcp_seq_start;
2413 s->seq_ops.next = tcp_seq_next;
2414 s->seq_ops.show = afinfo->seq_show;
2415 s->seq_ops.stop = tcp_seq_stop;
2416
2417 rc = seq_open(file, &s->seq_ops);
2418 if (rc)
2419 goto out_kfree;
2420 seq = file->private_data;
2421 seq->private = s;
2422out:
2423 return rc;
2424out_kfree:
2425 kfree(s);
2426 goto out;
2427}
2428
2429int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2430{
2431 int rc = 0;
2432 struct proc_dir_entry *p;
2433
2434 if (!afinfo)
2435 return -EINVAL;
2436 afinfo->seq_fops->owner = afinfo->owner;
2437 afinfo->seq_fops->open = tcp_seq_open;
2438 afinfo->seq_fops->read = seq_read;
2439 afinfo->seq_fops->llseek = seq_lseek;
2440 afinfo->seq_fops->release = seq_release_private;
2441
2442 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2443 if (p)
2444 p->data = afinfo;
2445 else
2446 rc = -ENOMEM;
2447 return rc;
2448}
2449
2450void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2451{
2452 if (!afinfo)
2453 return;
2454 proc_net_remove(afinfo->name);
2455 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2456}
2457
2458static void get_openreq4(struct sock *sk, struct open_request *req,
2459 char *tmpbuf, int i, int uid)
2460{
2461 int ttd = req->expires - jiffies;
2462
2463 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2464 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2465 i,
2466 req->af.v4_req.loc_addr,
2467 ntohs(inet_sk(sk)->sport),
2468 req->af.v4_req.rmt_addr,
2469 ntohs(req->rmt_port),
2470 TCP_SYN_RECV,
2471 0, 0,
2472 1,
2473 jiffies_to_clock_t(ttd),
2474 req->retrans,
2475 uid,
2476 0,
2477 0,
2478 atomic_read(&sk->sk_refcnt),
2479 req);
2480}
2481
2482static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2483{
2484 int timer_active;
2485 unsigned long timer_expires;
2486 struct tcp_opt *tp = tcp_sk(sp);
2487 struct inet_opt *inet = inet_sk(sp);
2488 unsigned int dest = inet->daddr;
2489 unsigned int src = inet->rcv_saddr;
2490 __u16 destp = ntohs(inet->dport);
2491 __u16 srcp = ntohs(inet->sport);
2492
2493 if (tp->pending == TCP_TIME_RETRANS) {
2494 timer_active = 1;
2495 timer_expires = tp->timeout;
2496 } else if (tp->pending == TCP_TIME_PROBE0) {
2497 timer_active = 4;
2498 timer_expires = tp->timeout;
2499 } else if (timer_pending(&sp->sk_timer)) {
2500 timer_active = 2;
2501 timer_expires = sp->sk_timer.expires;
2502 } else {
2503 timer_active = 0;
2504 timer_expires = jiffies;
2505 }
2506
2507 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2508 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2509 i, src, srcp, dest, destp, sp->sk_state,
2510 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
2511 timer_active,
2512 jiffies_to_clock_t(timer_expires - jiffies),
2513 tp->retransmits,
2514 sock_i_uid(sp),
2515 tp->probes_out,
2516 sock_i_ino(sp),
2517 atomic_read(&sp->sk_refcnt), sp,
2518 tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong,
2519 tp->snd_cwnd,
2520 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2521}
2522
2523static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
2524{
2525 unsigned int dest, src;
2526 __u16 destp, srcp;
2527 int ttd = tw->tw_ttd - jiffies;
2528
2529 if (ttd < 0)
2530 ttd = 0;
2531
2532 dest = tw->tw_daddr;
2533 src = tw->tw_rcv_saddr;
2534 destp = ntohs(tw->tw_dport);
2535 srcp = ntohs(tw->tw_sport);
2536
2537 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2538 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2539 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2540 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2541 atomic_read(&tw->tw_refcnt), tw);
2542}
2543
2544#define TMPSZ 150
2545
2546static int tcp4_seq_show(struct seq_file *seq, void *v)
2547{
2548 struct tcp_iter_state* st;
2549 char tmpbuf[TMPSZ + 1];
2550
2551 if (v == SEQ_START_TOKEN) {
2552 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2553 " sl local_address rem_address st tx_queue "
2554 "rx_queue tr tm->when retrnsmt uid timeout "
2555 "inode");
2556 goto out;
2557 }
2558 st = seq->private;
2559
2560 switch (st->state) {
2561 case TCP_SEQ_STATE_LISTENING:
2562 case TCP_SEQ_STATE_ESTABLISHED:
2563 get_tcp4_sock(v, tmpbuf, st->num);
2564 break;
2565 case TCP_SEQ_STATE_OPENREQ:
2566 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2567 break;
2568 case TCP_SEQ_STATE_TIME_WAIT:
2569 get_timewait4_sock(v, tmpbuf, st->num);
2570 break;
2571 }
2572 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2573out:
2574 return 0;
2575}
2576
2577static struct file_operations tcp4_seq_fops;
2578static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2579 .owner = THIS_MODULE,
2580 .name = "tcp",
2581 .family = AF_INET,
2582 .seq_show = tcp4_seq_show,
2583 .seq_fops = &tcp4_seq_fops,
2584};
2585
2586int __init tcp4_proc_init(void)
2587{
2588 return tcp_proc_register(&tcp4_seq_afinfo);
2589}
2590
2591void tcp4_proc_exit(void)
2592{
2593 tcp_proc_unregister(&tcp4_seq_afinfo);
2594}
2595#endif
2596
2597struct proto tcp_prot = {
2598 .name = "TCP",
2599 .close = tcp_close,
2600 .connect = tcp_v4_connect,
2601 .disconnect = tcp_disconnect,
2602 .accept = tcp_accept,
2603 .ioctl = tcp_ioctl,
2604 .init = tcp_v4_init_sock,
2605 .destroy = tcp_v4_destroy_sock,
2606 .shutdown = tcp_shutdown,
2607 .setsockopt = tcp_setsockopt,
2608 .getsockopt = tcp_getsockopt,
2609 .sendmsg = tcp_sendmsg,
2610 .recvmsg = tcp_recvmsg,
2611 .backlog_rcv = tcp_v4_do_rcv,
2612 .hash = tcp_v4_hash,
2613 .unhash = tcp_unhash,
2614 .get_port = tcp_v4_get_port,
2615 .enter_memory_pressure = tcp_enter_memory_pressure,
2616 .sockets_allocated = &tcp_sockets_allocated,
2617 .memory_allocated = &tcp_memory_allocated,
2618 .memory_pressure = &tcp_memory_pressure,
2619 .sysctl_mem = sysctl_tcp_mem,
2620 .sysctl_wmem = sysctl_tcp_wmem,
2621 .sysctl_rmem = sysctl_tcp_rmem,
2622 .max_header = MAX_TCP_HEADER,
2623 .slab_obj_size = sizeof(struct tcp_sock),
2624};
2625
2626
2627
2628void __init tcp_v4_init(struct net_proto_family *ops)
2629{
2630 int err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket);
2631 if (err < 0)
2632 panic("Failed to create the TCP control socket.\n");
2633 tcp_socket->sk->sk_allocation = GFP_ATOMIC;
2634 inet_sk(tcp_socket->sk)->uc_ttl = -1;
2635
2636
2637
2638
2639
2640 tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
2641}
2642
2643EXPORT_SYMBOL(ipv4_specific);
2644EXPORT_SYMBOL(tcp_bind_hash);
2645EXPORT_SYMBOL(tcp_bucket_create);
2646EXPORT_SYMBOL(tcp_hashinfo);
2647EXPORT_SYMBOL(tcp_inherit_port);
2648EXPORT_SYMBOL(tcp_listen_wlock);
2649EXPORT_SYMBOL(tcp_port_rover);
2650EXPORT_SYMBOL(tcp_prot);
2651EXPORT_SYMBOL(tcp_put_port);
2652EXPORT_SYMBOL(tcp_unhash);
2653EXPORT_SYMBOL(tcp_v4_conn_request);
2654EXPORT_SYMBOL(tcp_v4_connect);
2655EXPORT_SYMBOL(tcp_v4_do_rcv);
2656EXPORT_SYMBOL(tcp_v4_lookup_listener);
2657EXPORT_SYMBOL(tcp_v4_rebuild_header);
2658EXPORT_SYMBOL(tcp_v4_remember_stamp);
2659EXPORT_SYMBOL(tcp_v4_send_check);
2660EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2661
2662#ifdef CONFIG_PROC_FS
2663EXPORT_SYMBOL(tcp_proc_register);
2664EXPORT_SYMBOL(tcp_proc_unregister);
2665#endif
2666#ifdef CONFIG_SYSCTL
2667EXPORT_SYMBOL(sysctl_local_port_range);
2668EXPORT_SYMBOL(sysctl_max_syn_backlog);
2669EXPORT_SYMBOL(sysctl_tcp_low_latency);
2670#endif
2671