1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54#include <linux/bottom_half.h>
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
63#include <linux/slab.h>
64
65#include <net/net_namespace.h>
66#include <net/icmp.h>
67#include <net/inet_hashtables.h>
68#include <net/tcp.h>
69#include <net/transp_v6.h>
70#include <net/ipv6.h>
71#include <net/inet_common.h>
72#include <net/timewait_sock.h>
73#include <net/xfrm.h>
74#include <net/netdma.h>
75#include <net/secure_seq.h>
76
77#include <linux/inet.h>
78#include <linux/ipv6.h>
79#include <linux/stddef.h>
80#include <linux/proc_fs.h>
81#include <linux/seq_file.h>
82
83#include <linux/crypto.h>
84#include <linux/scatterlist.h>
85
86int sysctl_tcp_tw_reuse __read_mostly;
87int sysctl_tcp_low_latency __read_mostly;
88EXPORT_SYMBOL(sysctl_tcp_low_latency);
89
90
91#ifdef CONFIG_TCP_MD5SIG
92static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
93 __be32 addr);
94static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
96#else
97static inline
98struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
99{
100 return NULL;
101}
102#endif
103
104struct inet_hashinfo tcp_hashinfo;
105EXPORT_SYMBOL(tcp_hashinfo);
106
107static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
108{
109 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
110 ip_hdr(skb)->saddr,
111 tcp_hdr(skb)->dest,
112 tcp_hdr(skb)->source);
113}
114
115int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
116{
117 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
118 struct tcp_sock *tp = tcp_sk(sk);
119
120
121
122
123
124
125
126
127
128
129
130
131 if (tcptw->tw_ts_recent_stamp &&
132 (twp == NULL || (sysctl_tcp_tw_reuse &&
133 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
134 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
135 if (tp->write_seq == 0)
136 tp->write_seq = 1;
137 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
138 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
139 sock_hold(sktw);
140 return 1;
141 }
142
143 return 0;
144}
145EXPORT_SYMBOL_GPL(tcp_twsk_unique);
146
147
148int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
149{
150 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
151 struct inet_sock *inet = inet_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 __be16 orig_sport, orig_dport;
154 __be32 daddr, nexthop;
155 struct flowi4 *fl4;
156 struct rtable *rt;
157 int err;
158 struct ip_options_rcu *inet_opt;
159
160 if (addr_len < sizeof(struct sockaddr_in))
161 return -EINVAL;
162
163 if (usin->sin_family != AF_INET)
164 return -EAFNOSUPPORT;
165
166 nexthop = daddr = usin->sin_addr.s_addr;
167 inet_opt = rcu_dereference_protected(inet->inet_opt,
168 sock_owned_by_user(sk));
169 if (inet_opt && inet_opt->opt.srr) {
170 if (!daddr)
171 return -EINVAL;
172 nexthop = inet_opt->opt.faddr;
173 }
174
175 orig_sport = inet->inet_sport;
176 orig_dport = usin->sin_port;
177 fl4 = &inet->cork.fl.u.ip4;
178 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
179 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
180 IPPROTO_TCP,
181 orig_sport, orig_dport, sk, true);
182 if (IS_ERR(rt)) {
183 err = PTR_ERR(rt);
184 if (err == -ENETUNREACH)
185 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
186 return err;
187 }
188
189 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
190 ip_rt_put(rt);
191 return -ENETUNREACH;
192 }
193
194 if (!inet_opt || !inet_opt->opt.srr)
195 daddr = fl4->daddr;
196
197 if (!inet->inet_saddr)
198 inet->inet_saddr = fl4->saddr;
199 inet->inet_rcv_saddr = inet->inet_saddr;
200
201 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
202
203 tp->rx_opt.ts_recent = 0;
204 tp->rx_opt.ts_recent_stamp = 0;
205 tp->write_seq = 0;
206 }
207
208 if (tcp_death_row.sysctl_tw_recycle &&
209 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
210 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
211
212
213
214
215
216
217 if (peer) {
218 inet_peer_refcheck(peer);
219 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
220 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
221 tp->rx_opt.ts_recent = peer->tcp_ts;
222 }
223 }
224 }
225
226 inet->inet_dport = usin->sin_port;
227 inet->inet_daddr = daddr;
228
229 inet_csk(sk)->icsk_ext_hdr_len = 0;
230 if (inet_opt)
231 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
232
233 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
234
235
236
237
238
239
240 tcp_set_state(sk, TCP_SYN_SENT);
241 err = inet_hash_connect(&tcp_death_row, sk);
242 if (err)
243 goto failure;
244
245 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
246 inet->inet_sport, inet->inet_dport, sk);
247 if (IS_ERR(rt)) {
248 err = PTR_ERR(rt);
249 rt = NULL;
250 goto failure;
251 }
252
253 sk->sk_gso_type = SKB_GSO_TCPV4;
254 sk_setup_caps(sk, &rt->dst);
255
256 if (!tp->write_seq)
257 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
258 inet->inet_daddr,
259 inet->inet_sport,
260 usin->sin_port);
261
262 inet->inet_id = tp->write_seq ^ jiffies;
263
264 err = tcp_connect(sk);
265 rt = NULL;
266 if (err)
267 goto failure;
268
269 return 0;
270
271failure:
272
273
274
275
276 tcp_set_state(sk, TCP_CLOSE);
277 ip_rt_put(rt);
278 sk->sk_route_caps = 0;
279 inet->inet_dport = 0;
280 return err;
281}
282EXPORT_SYMBOL(tcp_v4_connect);
283
284
285
286
287static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
288{
289 struct dst_entry *dst;
290 struct inet_sock *inet = inet_sk(sk);
291
292
293
294
295
296 if (sk->sk_state == TCP_LISTEN)
297 return;
298
299
300
301
302
303
304
305 if ((dst = __sk_dst_check(sk, 0)) == NULL)
306 return;
307
308 dst->ops->update_pmtu(dst, mtu);
309
310
311
312
313 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
314 sk->sk_err_soft = EMSGSIZE;
315
316 mtu = dst_mtu(dst);
317
318 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
319 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
320 tcp_sync_mss(sk, mtu);
321
322
323
324
325
326
327 tcp_simple_retransmit(sk);
328 }
329}
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
348{
349 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
350 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
351 struct inet_connection_sock *icsk;
352 struct tcp_sock *tp;
353 struct inet_sock *inet;
354 const int type = icmp_hdr(icmp_skb)->type;
355 const int code = icmp_hdr(icmp_skb)->code;
356 struct sock *sk;
357 struct sk_buff *skb;
358 __u32 seq;
359 __u32 remaining;
360 int err;
361 struct net *net = dev_net(icmp_skb->dev);
362
363 if (icmp_skb->len < (iph->ihl << 2) + 8) {
364 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
365 return;
366 }
367
368 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
369 iph->saddr, th->source, inet_iif(icmp_skb));
370 if (!sk) {
371 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
372 return;
373 }
374 if (sk->sk_state == TCP_TIME_WAIT) {
375 inet_twsk_put(inet_twsk(sk));
376 return;
377 }
378
379 bh_lock_sock(sk);
380
381
382
383 if (sock_owned_by_user(sk))
384 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
385
386 if (sk->sk_state == TCP_CLOSE)
387 goto out;
388
389 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
390 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
391 goto out;
392 }
393
394 icsk = inet_csk(sk);
395 tp = tcp_sk(sk);
396 seq = ntohl(th->seq);
397 if (sk->sk_state != TCP_LISTEN &&
398 !between(seq, tp->snd_una, tp->snd_nxt)) {
399 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
400 goto out;
401 }
402
403 switch (type) {
404 case ICMP_SOURCE_QUENCH:
405
406 goto out;
407 case ICMP_PARAMETERPROB:
408 err = EPROTO;
409 break;
410 case ICMP_DEST_UNREACH:
411 if (code > NR_ICMP_UNREACH)
412 goto out;
413
414 if (code == ICMP_FRAG_NEEDED) {
415 if (!sock_owned_by_user(sk))
416 do_pmtu_discovery(sk, iph, info);
417 goto out;
418 }
419
420 err = icmp_err_convert[code].errno;
421
422
423 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
424 break;
425 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
426 !icsk->icsk_backoff)
427 break;
428
429 if (sock_owned_by_user(sk))
430 break;
431
432 icsk->icsk_backoff--;
433 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
434 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
435 tcp_bound_rto(sk);
436
437 skb = tcp_write_queue_head(sk);
438 BUG_ON(!skb);
439
440 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
441 tcp_time_stamp - TCP_SKB_CB(skb)->when);
442
443 if (remaining) {
444 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
445 remaining, TCP_RTO_MAX);
446 } else {
447
448
449 tcp_retransmit_timer(sk);
450 }
451
452 break;
453 case ICMP_TIME_EXCEEDED:
454 err = EHOSTUNREACH;
455 break;
456 default:
457 goto out;
458 }
459
460 switch (sk->sk_state) {
461 struct request_sock *req, **prev;
462 case TCP_LISTEN:
463 if (sock_owned_by_user(sk))
464 goto out;
465
466 req = inet_csk_search_req(sk, &prev, th->dest,
467 iph->daddr, iph->saddr);
468 if (!req)
469 goto out;
470
471
472
473
474 WARN_ON(req->sk);
475
476 if (seq != tcp_rsk(req)->snt_isn) {
477 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
478 goto out;
479 }
480
481
482
483
484
485
486
487 inet_csk_reqsk_queue_drop(sk, req, prev);
488 goto out;
489
490 case TCP_SYN_SENT:
491 case TCP_SYN_RECV:
492
493
494 if (!sock_owned_by_user(sk)) {
495 sk->sk_err = err;
496
497 sk->sk_error_report(sk);
498
499 tcp_done(sk);
500 } else {
501 sk->sk_err_soft = err;
502 }
503 goto out;
504 }
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522 inet = inet_sk(sk);
523 if (!sock_owned_by_user(sk) && inet->recverr) {
524 sk->sk_err = err;
525 sk->sk_error_report(sk);
526 } else {
527 sk->sk_err_soft = err;
528 }
529
530out:
531 bh_unlock_sock(sk);
532 sock_put(sk);
533}
534
535static void __tcp_v4_send_check(struct sk_buff *skb,
536 __be32 saddr, __be32 daddr)
537{
538 struct tcphdr *th = tcp_hdr(skb);
539
540 if (skb->ip_summed == CHECKSUM_PARTIAL) {
541 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
542 skb->csum_start = skb_transport_header(skb) - skb->head;
543 skb->csum_offset = offsetof(struct tcphdr, check);
544 } else {
545 th->check = tcp_v4_check(skb->len, saddr, daddr,
546 csum_partial(th,
547 th->doff << 2,
548 skb->csum));
549 }
550}
551
552
553void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
554{
555 const struct inet_sock *inet = inet_sk(sk);
556
557 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
558}
559EXPORT_SYMBOL(tcp_v4_send_check);
560
561int tcp_v4_gso_send_check(struct sk_buff *skb)
562{
563 const struct iphdr *iph;
564 struct tcphdr *th;
565
566 if (!pskb_may_pull(skb, sizeof(*th)))
567 return -EINVAL;
568
569 iph = ip_hdr(skb);
570 th = tcp_hdr(skb);
571
572 th->check = 0;
573 skb->ip_summed = CHECKSUM_PARTIAL;
574 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
575 return 0;
576}
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
592{
593 const struct tcphdr *th = tcp_hdr(skb);
594 struct {
595 struct tcphdr th;
596#ifdef CONFIG_TCP_MD5SIG
597 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
598#endif
599 } rep;
600 struct ip_reply_arg arg;
601#ifdef CONFIG_TCP_MD5SIG
602 struct tcp_md5sig_key *key;
603#endif
604 struct net *net;
605
606
607 if (th->rst)
608 return;
609
610 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
611 return;
612
613
614 memset(&rep, 0, sizeof(rep));
615 rep.th.dest = th->source;
616 rep.th.source = th->dest;
617 rep.th.doff = sizeof(struct tcphdr) / 4;
618 rep.th.rst = 1;
619
620 if (th->ack) {
621 rep.th.seq = th->ack_seq;
622 } else {
623 rep.th.ack = 1;
624 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
625 skb->len - (th->doff << 2));
626 }
627
628 memset(&arg, 0, sizeof(arg));
629 arg.iov[0].iov_base = (unsigned char *)&rep;
630 arg.iov[0].iov_len = sizeof(rep.th);
631
632#ifdef CONFIG_TCP_MD5SIG
633 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
634 if (key) {
635 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
636 (TCPOPT_NOP << 16) |
637 (TCPOPT_MD5SIG << 8) |
638 TCPOLEN_MD5SIG);
639
640 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
641 rep.th.doff = arg.iov[0].iov_len / 4;
642
643 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
644 key, ip_hdr(skb)->saddr,
645 ip_hdr(skb)->daddr, &rep.th);
646 }
647#endif
648 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
649 ip_hdr(skb)->saddr,
650 arg.iov[0].iov_len, IPPROTO_TCP, 0);
651 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
652 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
653
654 net = dev_net(skb_dst(skb)->dev);
655 arg.tos = ip_hdr(skb)->tos;
656 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
657 &arg, arg.iov[0].iov_len);
658
659 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
660 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
661}
662
663
664
665
666
667static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
668 u32 win, u32 ts, int oif,
669 struct tcp_md5sig_key *key,
670 int reply_flags, u8 tos)
671{
672 const struct tcphdr *th = tcp_hdr(skb);
673 struct {
674 struct tcphdr th;
675 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
676#ifdef CONFIG_TCP_MD5SIG
677 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
678#endif
679 ];
680 } rep;
681 struct ip_reply_arg arg;
682 struct net *net = dev_net(skb_dst(skb)->dev);
683
684 memset(&rep.th, 0, sizeof(struct tcphdr));
685 memset(&arg, 0, sizeof(arg));
686
687 arg.iov[0].iov_base = (unsigned char *)&rep;
688 arg.iov[0].iov_len = sizeof(rep.th);
689 if (ts) {
690 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
691 (TCPOPT_TIMESTAMP << 8) |
692 TCPOLEN_TIMESTAMP);
693 rep.opt[1] = htonl(tcp_time_stamp);
694 rep.opt[2] = htonl(ts);
695 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
696 }
697
698
699 rep.th.dest = th->source;
700 rep.th.source = th->dest;
701 rep.th.doff = arg.iov[0].iov_len / 4;
702 rep.th.seq = htonl(seq);
703 rep.th.ack_seq = htonl(ack);
704 rep.th.ack = 1;
705 rep.th.window = htons(win);
706
707#ifdef CONFIG_TCP_MD5SIG
708 if (key) {
709 int offset = (ts) ? 3 : 0;
710
711 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
712 (TCPOPT_NOP << 16) |
713 (TCPOPT_MD5SIG << 8) |
714 TCPOLEN_MD5SIG);
715 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
716 rep.th.doff = arg.iov[0].iov_len/4;
717
718 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
719 key, ip_hdr(skb)->saddr,
720 ip_hdr(skb)->daddr, &rep.th);
721 }
722#endif
723 arg.flags = reply_flags;
724 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
725 ip_hdr(skb)->saddr,
726 arg.iov[0].iov_len, IPPROTO_TCP, 0);
727 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
728 if (oif)
729 arg.bound_dev_if = oif;
730 arg.tos = tos;
731 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
732 &arg, arg.iov[0].iov_len);
733
734 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
735}
736
737static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
738{
739 struct inet_timewait_sock *tw = inet_twsk(sk);
740 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
741
742 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
743 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
744 tcptw->tw_ts_recent,
745 tw->tw_bound_dev_if,
746 tcp_twsk_md5_key(tcptw),
747 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
748 tw->tw_tos
749 );
750
751 inet_twsk_put(tw);
752}
753
754static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
755 struct request_sock *req)
756{
757 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
758 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
759 req->ts_recent,
760 0,
761 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
762 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
763 ip_hdr(skb)->tos);
764}
765
766
767
768
769
770
771static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
772 struct request_sock *req,
773 struct request_values *rvp)
774{
775 const struct inet_request_sock *ireq = inet_rsk(req);
776 struct flowi4 fl4;
777 int err = -1;
778 struct sk_buff * skb;
779
780
781 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
782 return -1;
783
784 skb = tcp_make_synack(sk, dst, req, rvp);
785
786 if (skb) {
787 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
788
789 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
790 ireq->rmt_addr,
791 ireq->opt);
792 err = net_xmit_eval(err);
793 }
794
795 dst_release(dst);
796 return err;
797}
798
799static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
800 struct request_values *rvp)
801{
802 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
803 return tcp_v4_send_synack(sk, NULL, req, rvp);
804}
805
806
807
808
809static void tcp_v4_reqsk_destructor(struct request_sock *req)
810{
811 kfree(inet_rsk(req)->opt);
812}
813
814
815
816
817int tcp_syn_flood_action(struct sock *sk,
818 const struct sk_buff *skb,
819 const char *proto)
820{
821 const char *msg = "Dropping request";
822 int want_cookie = 0;
823 struct listen_sock *lopt;
824
825
826
827#ifdef CONFIG_SYN_COOKIES
828 if (sysctl_tcp_syncookies) {
829 msg = "Sending cookies";
830 want_cookie = 1;
831 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
832 } else
833#endif
834 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
835
836 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
837 if (!lopt->synflood_warned) {
838 lopt->synflood_warned = 1;
839 pr_info("%s: Possible SYN flooding on port %d. %s. "
840 " Check SNMP counters.\n",
841 proto, ntohs(tcp_hdr(skb)->dest), msg);
842 }
843 return want_cookie;
844}
845EXPORT_SYMBOL(tcp_syn_flood_action);
846
847
848
849
850static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
851 struct sk_buff *skb)
852{
853 const struct ip_options *opt = &(IPCB(skb)->opt);
854 struct ip_options_rcu *dopt = NULL;
855
856 if (opt && opt->optlen) {
857 int opt_size = sizeof(*dopt) + opt->optlen;
858
859 dopt = kmalloc(opt_size, GFP_ATOMIC);
860 if (dopt) {
861 if (ip_options_echo(&dopt->opt, skb)) {
862 kfree(dopt);
863 dopt = NULL;
864 }
865 }
866 }
867 return dopt;
868}
869
870#ifdef CONFIG_TCP_MD5SIG
871
872
873
874
875
876
877
878static struct tcp_md5sig_key *
879 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
880{
881 struct tcp_sock *tp = tcp_sk(sk);
882 int i;
883
884 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
885 return NULL;
886 for (i = 0; i < tp->md5sig_info->entries4; i++) {
887 if (tp->md5sig_info->keys4[i].addr == addr)
888 return &tp->md5sig_info->keys4[i].base;
889 }
890 return NULL;
891}
892
893struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
894 struct sock *addr_sk)
895{
896 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
897}
898EXPORT_SYMBOL(tcp_v4_md5_lookup);
899
900static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
901 struct request_sock *req)
902{
903 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
904}
905
906
907int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
908 u8 *newkey, u8 newkeylen)
909{
910
911 struct tcp_md5sig_key *key;
912 struct tcp_sock *tp = tcp_sk(sk);
913 struct tcp4_md5sig_key *keys;
914
915 key = tcp_v4_md5_do_lookup(sk, addr);
916 if (key) {
917
918 kfree(key->key);
919 key->key = newkey;
920 key->keylen = newkeylen;
921 } else {
922 struct tcp_md5sig_info *md5sig;
923
924 if (!tp->md5sig_info) {
925 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
926 GFP_ATOMIC);
927 if (!tp->md5sig_info) {
928 kfree(newkey);
929 return -ENOMEM;
930 }
931 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
932 }
933
934 md5sig = tp->md5sig_info;
935 if (md5sig->entries4 == 0 &&
936 tcp_alloc_md5sig_pool(sk) == NULL) {
937 kfree(newkey);
938 return -ENOMEM;
939 }
940
941 if (md5sig->alloced4 == md5sig->entries4) {
942 keys = kmalloc((sizeof(*keys) *
943 (md5sig->entries4 + 1)), GFP_ATOMIC);
944 if (!keys) {
945 kfree(newkey);
946 if (md5sig->entries4 == 0)
947 tcp_free_md5sig_pool();
948 return -ENOMEM;
949 }
950
951 if (md5sig->entries4)
952 memcpy(keys, md5sig->keys4,
953 sizeof(*keys) * md5sig->entries4);
954
955
956 kfree(md5sig->keys4);
957 md5sig->keys4 = keys;
958 md5sig->alloced4++;
959 }
960 md5sig->entries4++;
961 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
962 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
963 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
964 }
965 return 0;
966}
967EXPORT_SYMBOL(tcp_v4_md5_do_add);
968
969static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
970 u8 *newkey, u8 newkeylen)
971{
972 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
973 newkey, newkeylen);
974}
975
976int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
977{
978 struct tcp_sock *tp = tcp_sk(sk);
979 int i;
980
981 for (i = 0; i < tp->md5sig_info->entries4; i++) {
982 if (tp->md5sig_info->keys4[i].addr == addr) {
983
984 kfree(tp->md5sig_info->keys4[i].base.key);
985 tp->md5sig_info->entries4--;
986
987 if (tp->md5sig_info->entries4 == 0) {
988 kfree(tp->md5sig_info->keys4);
989 tp->md5sig_info->keys4 = NULL;
990 tp->md5sig_info->alloced4 = 0;
991 tcp_free_md5sig_pool();
992 } else if (tp->md5sig_info->entries4 != i) {
993
994 memmove(&tp->md5sig_info->keys4[i],
995 &tp->md5sig_info->keys4[i+1],
996 (tp->md5sig_info->entries4 - i) *
997 sizeof(struct tcp4_md5sig_key));
998 }
999 return 0;
1000 }
1001 }
1002 return -ENOENT;
1003}
1004EXPORT_SYMBOL(tcp_v4_md5_do_del);
1005
1006static void tcp_v4_clear_md5_list(struct sock *sk)
1007{
1008 struct tcp_sock *tp = tcp_sk(sk);
1009
1010
1011
1012
1013
1014 if (tp->md5sig_info->entries4) {
1015 int i;
1016 for (i = 0; i < tp->md5sig_info->entries4; i++)
1017 kfree(tp->md5sig_info->keys4[i].base.key);
1018 tp->md5sig_info->entries4 = 0;
1019 tcp_free_md5sig_pool();
1020 }
1021 if (tp->md5sig_info->keys4) {
1022 kfree(tp->md5sig_info->keys4);
1023 tp->md5sig_info->keys4 = NULL;
1024 tp->md5sig_info->alloced4 = 0;
1025 }
1026}
1027
1028static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1029 int optlen)
1030{
1031 struct tcp_md5sig cmd;
1032 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1033 u8 *newkey;
1034
1035 if (optlen < sizeof(cmd))
1036 return -EINVAL;
1037
1038 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1039 return -EFAULT;
1040
1041 if (sin->sin_family != AF_INET)
1042 return -EINVAL;
1043
1044 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1045 if (!tcp_sk(sk)->md5sig_info)
1046 return -ENOENT;
1047 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1048 }
1049
1050 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1051 return -EINVAL;
1052
1053 if (!tcp_sk(sk)->md5sig_info) {
1054 struct tcp_sock *tp = tcp_sk(sk);
1055 struct tcp_md5sig_info *p;
1056
1057 p = kzalloc(sizeof(*p), sk->sk_allocation);
1058 if (!p)
1059 return -EINVAL;
1060
1061 tp->md5sig_info = p;
1062 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1063 }
1064
1065 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1066 if (!newkey)
1067 return -ENOMEM;
1068 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1069 newkey, cmd.tcpm_keylen);
1070}
1071
1072static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1073 __be32 daddr, __be32 saddr, int nbytes)
1074{
1075 struct tcp4_pseudohdr *bp;
1076 struct scatterlist sg;
1077
1078 bp = &hp->md5_blk.ip4;
1079
1080
1081
1082
1083
1084
1085 bp->saddr = saddr;
1086 bp->daddr = daddr;
1087 bp->pad = 0;
1088 bp->protocol = IPPROTO_TCP;
1089 bp->len = cpu_to_be16(nbytes);
1090
1091 sg_init_one(&sg, bp, sizeof(*bp));
1092 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1093}
1094
1095static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1096 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1097{
1098 struct tcp_md5sig_pool *hp;
1099 struct hash_desc *desc;
1100
1101 hp = tcp_get_md5sig_pool();
1102 if (!hp)
1103 goto clear_hash_noput;
1104 desc = &hp->md5_desc;
1105
1106 if (crypto_hash_init(desc))
1107 goto clear_hash;
1108 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1109 goto clear_hash;
1110 if (tcp_md5_hash_header(hp, th))
1111 goto clear_hash;
1112 if (tcp_md5_hash_key(hp, key))
1113 goto clear_hash;
1114 if (crypto_hash_final(desc, md5_hash))
1115 goto clear_hash;
1116
1117 tcp_put_md5sig_pool();
1118 return 0;
1119
1120clear_hash:
1121 tcp_put_md5sig_pool();
1122clear_hash_noput:
1123 memset(md5_hash, 0, 16);
1124 return 1;
1125}
1126
1127int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1128 const struct sock *sk, const struct request_sock *req,
1129 const struct sk_buff *skb)
1130{
1131 struct tcp_md5sig_pool *hp;
1132 struct hash_desc *desc;
1133 const struct tcphdr *th = tcp_hdr(skb);
1134 __be32 saddr, daddr;
1135
1136 if (sk) {
1137 saddr = inet_sk(sk)->inet_saddr;
1138 daddr = inet_sk(sk)->inet_daddr;
1139 } else if (req) {
1140 saddr = inet_rsk(req)->loc_addr;
1141 daddr = inet_rsk(req)->rmt_addr;
1142 } else {
1143 const struct iphdr *iph = ip_hdr(skb);
1144 saddr = iph->saddr;
1145 daddr = iph->daddr;
1146 }
1147
1148 hp = tcp_get_md5sig_pool();
1149 if (!hp)
1150 goto clear_hash_noput;
1151 desc = &hp->md5_desc;
1152
1153 if (crypto_hash_init(desc))
1154 goto clear_hash;
1155
1156 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1157 goto clear_hash;
1158 if (tcp_md5_hash_header(hp, th))
1159 goto clear_hash;
1160 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1161 goto clear_hash;
1162 if (tcp_md5_hash_key(hp, key))
1163 goto clear_hash;
1164 if (crypto_hash_final(desc, md5_hash))
1165 goto clear_hash;
1166
1167 tcp_put_md5sig_pool();
1168 return 0;
1169
1170clear_hash:
1171 tcp_put_md5sig_pool();
1172clear_hash_noput:
1173 memset(md5_hash, 0, 16);
1174 return 1;
1175}
1176EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1177
1178static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1179{
1180
1181
1182
1183
1184
1185
1186
1187
1188 const __u8 *hash_location = NULL;
1189 struct tcp_md5sig_key *hash_expected;
1190 const struct iphdr *iph = ip_hdr(skb);
1191 const struct tcphdr *th = tcp_hdr(skb);
1192 int genhash;
1193 unsigned char newhash[16];
1194
1195 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1196 hash_location = tcp_parse_md5sig_option(th);
1197
1198
1199 if (!hash_expected && !hash_location)
1200 return 0;
1201
1202 if (hash_expected && !hash_location) {
1203 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1204 return 1;
1205 }
1206
1207 if (!hash_expected && hash_location) {
1208 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1209 return 1;
1210 }
1211
1212
1213
1214
1215 genhash = tcp_v4_md5_hash_skb(newhash,
1216 hash_expected,
1217 NULL, NULL, skb);
1218
1219 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1220 if (net_ratelimit()) {
1221 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1222 &iph->saddr, ntohs(th->source),
1223 &iph->daddr, ntohs(th->dest),
1224 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1225 }
1226 return 1;
1227 }
1228 return 0;
1229}
1230
1231#endif
1232
1233struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1234 .family = PF_INET,
1235 .obj_size = sizeof(struct tcp_request_sock),
1236 .rtx_syn_ack = tcp_v4_rtx_synack,
1237 .send_ack = tcp_v4_reqsk_send_ack,
1238 .destructor = tcp_v4_reqsk_destructor,
1239 .send_reset = tcp_v4_send_reset,
1240 .syn_ack_timeout = tcp_syn_ack_timeout,
1241};
1242
1243#ifdef CONFIG_TCP_MD5SIG
1244static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1245 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1246 .calc_md5_hash = tcp_v4_md5_hash_skb,
1247};
1248#endif
1249
1250int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1251{
1252 struct tcp_extend_values tmp_ext;
1253 struct tcp_options_received tmp_opt;
1254 const u8 *hash_location;
1255 struct request_sock *req;
1256 struct inet_request_sock *ireq;
1257 struct tcp_sock *tp = tcp_sk(sk);
1258 struct dst_entry *dst = NULL;
1259 __be32 saddr = ip_hdr(skb)->saddr;
1260 __be32 daddr = ip_hdr(skb)->daddr;
1261 __u32 isn = TCP_SKB_CB(skb)->when;
1262 int want_cookie = 0;
1263
1264
1265 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1266 goto drop;
1267
1268
1269
1270
1271
1272 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1273 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1274 if (!want_cookie)
1275 goto drop;
1276 }
1277
1278
1279
1280
1281
1282
1283 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1284 goto drop;
1285
1286 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1287 if (!req)
1288 goto drop;
1289
1290#ifdef CONFIG_TCP_MD5SIG
1291 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1292#endif
1293
1294 tcp_clear_options(&tmp_opt);
1295 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1296 tmp_opt.user_mss = tp->rx_opt.user_mss;
1297 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1298
1299 if (tmp_opt.cookie_plus > 0 &&
1300 tmp_opt.saw_tstamp &&
1301 !tp->rx_opt.cookie_out_never &&
1302 (sysctl_tcp_cookie_size > 0 ||
1303 (tp->cookie_values != NULL &&
1304 tp->cookie_values->cookie_desired > 0))) {
1305 u8 *c;
1306 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1307 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1308
1309 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1310 goto drop_and_release;
1311
1312
1313 *mess++ ^= (__force u32)daddr;
1314 *mess++ ^= (__force u32)saddr;
1315
1316
1317 c = (u8 *)mess;
1318 while (l-- > 0)
1319 *c++ ^= *hash_location++;
1320
1321 want_cookie = 0;
1322 tmp_ext.cookie_out_never = 0;
1323 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1324 } else if (!tp->rx_opt.cookie_in_always) {
1325
1326 tmp_ext.cookie_out_never = 1;
1327 tmp_ext.cookie_plus = 0;
1328 } else {
1329 goto drop_and_release;
1330 }
1331 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1332
1333 if (want_cookie && !tmp_opt.saw_tstamp)
1334 tcp_clear_options(&tmp_opt);
1335
1336 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1337 tcp_openreq_init(req, &tmp_opt, skb);
1338
1339 ireq = inet_rsk(req);
1340 ireq->loc_addr = daddr;
1341 ireq->rmt_addr = saddr;
1342 ireq->no_srccheck = inet_sk(sk)->transparent;
1343 ireq->opt = tcp_v4_save_options(sk, skb);
1344
1345 if (security_inet_conn_request(sk, skb, req))
1346 goto drop_and_free;
1347
1348 if (!want_cookie || tmp_opt.tstamp_ok)
1349 TCP_ECN_create_request(req, tcp_hdr(skb));
1350
1351 if (want_cookie) {
1352 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1353 req->cookie_ts = tmp_opt.tstamp_ok;
1354 } else if (!isn) {
1355 struct inet_peer *peer = NULL;
1356 struct flowi4 fl4;
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367 if (tmp_opt.saw_tstamp &&
1368 tcp_death_row.sysctl_tw_recycle &&
1369 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1370 fl4.daddr == saddr &&
1371 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1372 inet_peer_refcheck(peer);
1373 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1374 (s32)(peer->tcp_ts - req->ts_recent) >
1375 TCP_PAWS_WINDOW) {
1376 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1377 goto drop_and_release;
1378 }
1379 }
1380
1381 else if (!sysctl_tcp_syncookies &&
1382 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1383 (sysctl_max_syn_backlog >> 2)) &&
1384 (!peer || !peer->tcp_ts_stamp) &&
1385 (!dst || !dst_metric(dst, RTAX_RTT))) {
1386
1387
1388
1389
1390
1391
1392
1393 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1394 &saddr, ntohs(tcp_hdr(skb)->source));
1395 goto drop_and_release;
1396 }
1397
1398 isn = tcp_v4_init_sequence(skb);
1399 }
1400 tcp_rsk(req)->snt_isn = isn;
1401 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1402
1403 if (tcp_v4_send_synack(sk, dst, req,
1404 (struct request_values *)&tmp_ext) ||
1405 want_cookie)
1406 goto drop_and_free;
1407
1408 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1409 return 0;
1410
1411drop_and_release:
1412 dst_release(dst);
1413drop_and_free:
1414 reqsk_free(req);
1415drop:
1416 return 0;
1417}
1418EXPORT_SYMBOL(tcp_v4_conn_request);
1419
1420
1421
1422
1423
1424
1425struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1426 struct request_sock *req,
1427 struct dst_entry *dst)
1428{
1429 struct inet_request_sock *ireq;
1430 struct inet_sock *newinet;
1431 struct tcp_sock *newtp;
1432 struct sock *newsk;
1433#ifdef CONFIG_TCP_MD5SIG
1434 struct tcp_md5sig_key *key;
1435#endif
1436 struct ip_options_rcu *inet_opt;
1437
1438 if (sk_acceptq_is_full(sk))
1439 goto exit_overflow;
1440
1441 newsk = tcp_create_openreq_child(sk, req, skb);
1442 if (!newsk)
1443 goto exit_nonewsk;
1444
1445 newsk->sk_gso_type = SKB_GSO_TCPV4;
1446
1447 newtp = tcp_sk(newsk);
1448 newinet = inet_sk(newsk);
1449 ireq = inet_rsk(req);
1450 newinet->inet_daddr = ireq->rmt_addr;
1451 newinet->inet_rcv_saddr = ireq->loc_addr;
1452 newinet->inet_saddr = ireq->loc_addr;
1453 inet_opt = ireq->opt;
1454 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1455 ireq->opt = NULL;
1456 newinet->mc_index = inet_iif(skb);
1457 newinet->mc_ttl = ip_hdr(skb)->ttl;
1458 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1459 if (inet_opt)
1460 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1461 newinet->inet_id = newtp->write_seq ^ jiffies;
1462
1463 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1464 goto put_and_exit;
1465
1466 sk_setup_caps(newsk, dst);
1467
1468 tcp_mtup_init(newsk);
1469 tcp_sync_mss(newsk, dst_mtu(dst));
1470 newtp->advmss = dst_metric_advmss(dst);
1471 if (tcp_sk(sk)->rx_opt.user_mss &&
1472 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1473 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1474
1475 tcp_initialize_rcv_mss(newsk);
1476 if (tcp_rsk(req)->snt_synack)
1477 tcp_valid_rtt_meas(newsk,
1478 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1479 newtp->total_retrans = req->retrans;
1480
1481#ifdef CONFIG_TCP_MD5SIG
1482
1483 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1484 if (key != NULL) {
1485
1486
1487
1488
1489
1490
1491 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1492 if (newkey != NULL)
1493 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1494 newkey, key->keylen);
1495 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1496 }
1497#endif
1498
1499 if (__inet_inherit_port(sk, newsk) < 0)
1500 goto put_and_exit;
1501 __inet_hash_nolisten(newsk, NULL);
1502
1503 return newsk;
1504
1505exit_overflow:
1506 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507exit_nonewsk:
1508 dst_release(dst);
1509exit:
1510 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1511 return NULL;
1512put_and_exit:
1513 tcp_clear_xmit_timers(newsk);
1514 bh_unlock_sock(newsk);
1515 sock_put(newsk);
1516 goto exit;
1517}
1518EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1519
1520static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1521{
1522 struct tcphdr *th = tcp_hdr(skb);
1523 const struct iphdr *iph = ip_hdr(skb);
1524 struct sock *nsk;
1525 struct request_sock **prev;
1526
1527 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1528 iph->saddr, iph->daddr);
1529 if (req)
1530 return tcp_check_req(sk, skb, req, prev);
1531
1532 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1533 th->source, iph->daddr, th->dest, inet_iif(skb));
1534
1535 if (nsk) {
1536 if (nsk->sk_state != TCP_TIME_WAIT) {
1537 bh_lock_sock(nsk);
1538 return nsk;
1539 }
1540 inet_twsk_put(inet_twsk(nsk));
1541 return NULL;
1542 }
1543
1544#ifdef CONFIG_SYN_COOKIES
1545 if (!th->syn)
1546 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1547#endif
1548 return sk;
1549}
1550
1551static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1552{
1553 const struct iphdr *iph = ip_hdr(skb);
1554
1555 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1556 if (!tcp_v4_check(skb->len, iph->saddr,
1557 iph->daddr, skb->csum)) {
1558 skb->ip_summed = CHECKSUM_UNNECESSARY;
1559 return 0;
1560 }
1561 }
1562
1563 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1564 skb->len, IPPROTO_TCP, 0);
1565
1566 if (skb->len <= 76) {
1567 return __skb_checksum_complete(skb);
1568 }
1569 return 0;
1570}
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1582{
1583 struct sock *rsk;
1584#ifdef CONFIG_TCP_MD5SIG
1585
1586
1587
1588
1589
1590
1591 if (tcp_v4_inbound_md5_hash(sk, skb))
1592 goto discard;
1593#endif
1594
1595 if (sk->sk_state == TCP_ESTABLISHED) {
1596 sock_rps_save_rxhash(sk, skb);
1597 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1598 rsk = sk;
1599 goto reset;
1600 }
1601 return 0;
1602 }
1603
1604 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1605 goto csum_err;
1606
1607 if (sk->sk_state == TCP_LISTEN) {
1608 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1609 if (!nsk)
1610 goto discard;
1611
1612 if (nsk != sk) {
1613 sock_rps_save_rxhash(nsk, skb);
1614 if (tcp_child_process(sk, nsk, skb)) {
1615 rsk = nsk;
1616 goto reset;
1617 }
1618 return 0;
1619 }
1620 } else
1621 sock_rps_save_rxhash(sk, skb);
1622
1623 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1624 rsk = sk;
1625 goto reset;
1626 }
1627 return 0;
1628
1629reset:
1630 tcp_v4_send_reset(rsk, skb);
1631discard:
1632 kfree_skb(skb);
1633
1634
1635
1636
1637
1638 return 0;
1639
1640csum_err:
1641 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1642 goto discard;
1643}
1644EXPORT_SYMBOL(tcp_v4_do_rcv);
1645
1646
1647
1648
1649
1650int tcp_v4_rcv(struct sk_buff *skb)
1651{
1652 const struct iphdr *iph;
1653 const struct tcphdr *th;
1654 struct sock *sk;
1655 int ret;
1656 struct net *net = dev_net(skb->dev);
1657
1658 if (skb->pkt_type != PACKET_HOST)
1659 goto discard_it;
1660
1661
1662 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1663
1664 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1665 goto discard_it;
1666
1667 th = tcp_hdr(skb);
1668
1669 if (th->doff < sizeof(struct tcphdr) / 4)
1670 goto bad_packet;
1671 if (!pskb_may_pull(skb, th->doff * 4))
1672 goto discard_it;
1673
1674
1675
1676
1677
1678 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1679 goto bad_packet;
1680
1681 th = tcp_hdr(skb);
1682 iph = ip_hdr(skb);
1683 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1684 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1685 skb->len - th->doff * 4);
1686 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1687 TCP_SKB_CB(skb)->when = 0;
1688 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1689 TCP_SKB_CB(skb)->sacked = 0;
1690
1691 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1692 if (!sk)
1693 goto no_tcp_socket;
1694
1695process:
1696 if (sk->sk_state == TCP_TIME_WAIT)
1697 goto do_time_wait;
1698
1699 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1700 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1701 goto discard_and_relse;
1702 }
1703
1704 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1705 goto discard_and_relse;
1706 nf_reset(skb);
1707
1708 if (sk_filter(sk, skb))
1709 goto discard_and_relse;
1710
1711 skb->dev = NULL;
1712
1713 bh_lock_sock_nested(sk);
1714 ret = 0;
1715 if (!sock_owned_by_user(sk)) {
1716#ifdef CONFIG_NET_DMA
1717 struct tcp_sock *tp = tcp_sk(sk);
1718 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1719 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1720 if (tp->ucopy.dma_chan)
1721 ret = tcp_v4_do_rcv(sk, skb);
1722 else
1723#endif
1724 {
1725 if (!tcp_prequeue(sk, skb))
1726 ret = tcp_v4_do_rcv(sk, skb);
1727 }
1728 } else if (unlikely(sk_add_backlog(sk, skb))) {
1729 bh_unlock_sock(sk);
1730 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1731 goto discard_and_relse;
1732 }
1733 bh_unlock_sock(sk);
1734
1735 sock_put(sk);
1736
1737 return ret;
1738
1739no_tcp_socket:
1740 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1741 goto discard_it;
1742
1743 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1744bad_packet:
1745 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1746 } else {
1747 tcp_v4_send_reset(NULL, skb);
1748 }
1749
1750discard_it:
1751
1752 kfree_skb(skb);
1753 return 0;
1754
1755discard_and_relse:
1756 sock_put(sk);
1757 goto discard_it;
1758
1759do_time_wait:
1760 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1761 inet_twsk_put(inet_twsk(sk));
1762 goto discard_it;
1763 }
1764
1765 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1766 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1767 inet_twsk_put(inet_twsk(sk));
1768 goto discard_it;
1769 }
1770 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1771 case TCP_TW_SYN: {
1772 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1773 &tcp_hashinfo,
1774 iph->daddr, th->dest,
1775 inet_iif(skb));
1776 if (sk2) {
1777 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1778 inet_twsk_put(inet_twsk(sk));
1779 sk = sk2;
1780 goto process;
1781 }
1782
1783 }
1784 case TCP_TW_ACK:
1785 tcp_v4_timewait_ack(sk, skb);
1786 break;
1787 case TCP_TW_RST:
1788 goto no_tcp_socket;
1789 case TCP_TW_SUCCESS:;
1790 }
1791 goto discard_it;
1792}
1793
1794struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1795{
1796 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1797 struct inet_sock *inet = inet_sk(sk);
1798 struct inet_peer *peer;
1799
1800 if (!rt ||
1801 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1802 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1803 *release_it = true;
1804 } else {
1805 if (!rt->peer)
1806 rt_bind_peer(rt, inet->inet_daddr, 1);
1807 peer = rt->peer;
1808 *release_it = false;
1809 }
1810
1811 return peer;
1812}
1813EXPORT_SYMBOL(tcp_v4_get_peer);
1814
1815void *tcp_v4_tw_get_peer(struct sock *sk)
1816{
1817 const struct inet_timewait_sock *tw = inet_twsk(sk);
1818
1819 return inet_getpeer_v4(tw->tw_daddr, 1);
1820}
1821EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1822
1823static struct timewait_sock_ops tcp_timewait_sock_ops = {
1824 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1825 .twsk_unique = tcp_twsk_unique,
1826 .twsk_destructor= tcp_twsk_destructor,
1827 .twsk_getpeer = tcp_v4_tw_get_peer,
1828};
1829
1830const struct inet_connection_sock_af_ops ipv4_specific = {
1831 .queue_xmit = ip_queue_xmit,
1832 .send_check = tcp_v4_send_check,
1833 .rebuild_header = inet_sk_rebuild_header,
1834 .conn_request = tcp_v4_conn_request,
1835 .syn_recv_sock = tcp_v4_syn_recv_sock,
1836 .get_peer = tcp_v4_get_peer,
1837 .net_header_len = sizeof(struct iphdr),
1838 .setsockopt = ip_setsockopt,
1839 .getsockopt = ip_getsockopt,
1840 .addr2sockaddr = inet_csk_addr2sockaddr,
1841 .sockaddr_len = sizeof(struct sockaddr_in),
1842 .bind_conflict = inet_csk_bind_conflict,
1843#ifdef CONFIG_COMPAT
1844 .compat_setsockopt = compat_ip_setsockopt,
1845 .compat_getsockopt = compat_ip_getsockopt,
1846#endif
1847};
1848EXPORT_SYMBOL(ipv4_specific);
1849
1850#ifdef CONFIG_TCP_MD5SIG
1851static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1852 .md5_lookup = tcp_v4_md5_lookup,
1853 .calc_md5_hash = tcp_v4_md5_hash_skb,
1854 .md5_add = tcp_v4_md5_add_func,
1855 .md5_parse = tcp_v4_parse_md5_keys,
1856};
1857#endif
1858
1859
1860
1861
1862static int tcp_v4_init_sock(struct sock *sk)
1863{
1864 struct inet_connection_sock *icsk = inet_csk(sk);
1865 struct tcp_sock *tp = tcp_sk(sk);
1866
1867 skb_queue_head_init(&tp->out_of_order_queue);
1868 tcp_init_xmit_timers(sk);
1869 tcp_prequeue_init(tp);
1870
1871 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1872 tp->mdev = TCP_TIMEOUT_INIT;
1873
1874
1875
1876
1877
1878
1879 tp->snd_cwnd = TCP_INIT_CWND;
1880
1881
1882
1883
1884 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1885 tp->snd_cwnd_clamp = ~0;
1886 tp->mss_cache = TCP_MSS_DEFAULT;
1887
1888 tp->reordering = sysctl_tcp_reordering;
1889 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1890
1891 sk->sk_state = TCP_CLOSE;
1892
1893 sk->sk_write_space = sk_stream_write_space;
1894 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1895
1896 icsk->icsk_af_ops = &ipv4_specific;
1897 icsk->icsk_sync_mss = tcp_sync_mss;
1898#ifdef CONFIG_TCP_MD5SIG
1899 tp->af_specific = &tcp_sock_ipv4_specific;
1900#endif
1901
1902
1903 if (sysctl_tcp_cookie_size > 0) {
1904
1905 tp->cookie_values =
1906 kzalloc(sizeof(*tp->cookie_values),
1907 sk->sk_allocation);
1908 if (tp->cookie_values != NULL)
1909 kref_init(&tp->cookie_values->kref);
1910 }
1911
1912
1913
1914
1915 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1916 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1917
1918 local_bh_disable();
1919 percpu_counter_inc(&tcp_sockets_allocated);
1920 local_bh_enable();
1921
1922 return 0;
1923}
1924
1925void tcp_v4_destroy_sock(struct sock *sk)
1926{
1927 struct tcp_sock *tp = tcp_sk(sk);
1928
1929 tcp_clear_xmit_timers(sk);
1930
1931 tcp_cleanup_congestion_control(sk);
1932
1933
1934 tcp_write_queue_purge(sk);
1935
1936
1937 __skb_queue_purge(&tp->out_of_order_queue);
1938
1939#ifdef CONFIG_TCP_MD5SIG
1940
1941 if (tp->md5sig_info) {
1942 tcp_v4_clear_md5_list(sk);
1943 kfree(tp->md5sig_info);
1944 tp->md5sig_info = NULL;
1945 }
1946#endif
1947
1948#ifdef CONFIG_NET_DMA
1949
1950 __skb_queue_purge(&sk->sk_async_wait_queue);
1951#endif
1952
1953
1954 __skb_queue_purge(&tp->ucopy.prequeue);
1955
1956
1957 if (inet_csk(sk)->icsk_bind_hash)
1958 inet_put_port(sk);
1959
1960
1961
1962
1963 if (sk->sk_sndmsg_page) {
1964 __free_page(sk->sk_sndmsg_page);
1965 sk->sk_sndmsg_page = NULL;
1966 }
1967
1968
1969 if (tp->cookie_values != NULL) {
1970 kref_put(&tp->cookie_values->kref,
1971 tcp_cookie_values_release);
1972 tp->cookie_values = NULL;
1973 }
1974
1975 percpu_counter_dec(&tcp_sockets_allocated);
1976}
1977EXPORT_SYMBOL(tcp_v4_destroy_sock);
1978
1979#ifdef CONFIG_PROC_FS
1980
1981
1982static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1983{
1984 return hlist_nulls_empty(head) ? NULL :
1985 list_entry(head->first, struct inet_timewait_sock, tw_node);
1986}
1987
1988static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1989{
1990 return !is_a_nulls(tw->tw_node.next) ?
1991 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1992}
1993
1994
1995
1996
1997
1998
1999static void *listening_get_next(struct seq_file *seq, void *cur)
2000{
2001 struct inet_connection_sock *icsk;
2002 struct hlist_nulls_node *node;
2003 struct sock *sk = cur;
2004 struct inet_listen_hashbucket *ilb;
2005 struct tcp_iter_state *st = seq->private;
2006 struct net *net = seq_file_net(seq);
2007
2008 if (!sk) {
2009 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2010 spin_lock_bh(&ilb->lock);
2011 sk = sk_nulls_head(&ilb->head);
2012 st->offset = 0;
2013 goto get_sk;
2014 }
2015 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2016 ++st->num;
2017 ++st->offset;
2018
2019 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2020 struct request_sock *req = cur;
2021
2022 icsk = inet_csk(st->syn_wait_sk);
2023 req = req->dl_next;
2024 while (1) {
2025 while (req) {
2026 if (req->rsk_ops->family == st->family) {
2027 cur = req;
2028 goto out;
2029 }
2030 req = req->dl_next;
2031 }
2032 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2033 break;
2034get_req:
2035 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2036 }
2037 sk = sk_nulls_next(st->syn_wait_sk);
2038 st->state = TCP_SEQ_STATE_LISTENING;
2039 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2040 } else {
2041 icsk = inet_csk(sk);
2042 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2043 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2044 goto start_req;
2045 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046 sk = sk_nulls_next(sk);
2047 }
2048get_sk:
2049 sk_nulls_for_each_from(sk, node) {
2050 if (!net_eq(sock_net(sk), net))
2051 continue;
2052 if (sk->sk_family == st->family) {
2053 cur = sk;
2054 goto out;
2055 }
2056 icsk = inet_csk(sk);
2057 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2058 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2059start_req:
2060 st->uid = sock_i_uid(sk);
2061 st->syn_wait_sk = sk;
2062 st->state = TCP_SEQ_STATE_OPENREQ;
2063 st->sbucket = 0;
2064 goto get_req;
2065 }
2066 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2067 }
2068 spin_unlock_bh(&ilb->lock);
2069 st->offset = 0;
2070 if (++st->bucket < INET_LHTABLE_SIZE) {
2071 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2072 spin_lock_bh(&ilb->lock);
2073 sk = sk_nulls_head(&ilb->head);
2074 goto get_sk;
2075 }
2076 cur = NULL;
2077out:
2078 return cur;
2079}
2080
2081static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2082{
2083 struct tcp_iter_state *st = seq->private;
2084 void *rc;
2085
2086 st->bucket = 0;
2087 st->offset = 0;
2088 rc = listening_get_next(seq, NULL);
2089
2090 while (rc && *pos) {
2091 rc = listening_get_next(seq, rc);
2092 --*pos;
2093 }
2094 return rc;
2095}
2096
2097static inline int empty_bucket(struct tcp_iter_state *st)
2098{
2099 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2100 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2101}
2102
2103
2104
2105
2106
2107static void *established_get_first(struct seq_file *seq)
2108{
2109 struct tcp_iter_state *st = seq->private;
2110 struct net *net = seq_file_net(seq);
2111 void *rc = NULL;
2112
2113 st->offset = 0;
2114 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2115 struct sock *sk;
2116 struct hlist_nulls_node *node;
2117 struct inet_timewait_sock *tw;
2118 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2119
2120
2121 if (empty_bucket(st))
2122 continue;
2123
2124 spin_lock_bh(lock);
2125 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2126 if (sk->sk_family != st->family ||
2127 !net_eq(sock_net(sk), net)) {
2128 continue;
2129 }
2130 rc = sk;
2131 goto out;
2132 }
2133 st->state = TCP_SEQ_STATE_TIME_WAIT;
2134 inet_twsk_for_each(tw, node,
2135 &tcp_hashinfo.ehash[st->bucket].twchain) {
2136 if (tw->tw_family != st->family ||
2137 !net_eq(twsk_net(tw), net)) {
2138 continue;
2139 }
2140 rc = tw;
2141 goto out;
2142 }
2143 spin_unlock_bh(lock);
2144 st->state = TCP_SEQ_STATE_ESTABLISHED;
2145 }
2146out:
2147 return rc;
2148}
2149
2150static void *established_get_next(struct seq_file *seq, void *cur)
2151{
2152 struct sock *sk = cur;
2153 struct inet_timewait_sock *tw;
2154 struct hlist_nulls_node *node;
2155 struct tcp_iter_state *st = seq->private;
2156 struct net *net = seq_file_net(seq);
2157
2158 ++st->num;
2159 ++st->offset;
2160
2161 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2162 tw = cur;
2163 tw = tw_next(tw);
2164get_tw:
2165 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2166 tw = tw_next(tw);
2167 }
2168 if (tw) {
2169 cur = tw;
2170 goto out;
2171 }
2172 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2173 st->state = TCP_SEQ_STATE_ESTABLISHED;
2174
2175
2176 st->offset = 0;
2177 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2178 empty_bucket(st))
2179 ;
2180 if (st->bucket > tcp_hashinfo.ehash_mask)
2181 return NULL;
2182
2183 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2184 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2185 } else
2186 sk = sk_nulls_next(sk);
2187
2188 sk_nulls_for_each_from(sk, node) {
2189 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2190 goto found;
2191 }
2192
2193 st->state = TCP_SEQ_STATE_TIME_WAIT;
2194 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2195 goto get_tw;
2196found:
2197 cur = sk;
2198out:
2199 return cur;
2200}
2201
2202static void *established_get_idx(struct seq_file *seq, loff_t pos)
2203{
2204 struct tcp_iter_state *st = seq->private;
2205 void *rc;
2206
2207 st->bucket = 0;
2208 rc = established_get_first(seq);
2209
2210 while (rc && pos) {
2211 rc = established_get_next(seq, rc);
2212 --pos;
2213 }
2214 return rc;
2215}
2216
2217static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2218{
2219 void *rc;
2220 struct tcp_iter_state *st = seq->private;
2221
2222 st->state = TCP_SEQ_STATE_LISTENING;
2223 rc = listening_get_idx(seq, &pos);
2224
2225 if (!rc) {
2226 st->state = TCP_SEQ_STATE_ESTABLISHED;
2227 rc = established_get_idx(seq, pos);
2228 }
2229
2230 return rc;
2231}
2232
2233static void *tcp_seek_last_pos(struct seq_file *seq)
2234{
2235 struct tcp_iter_state *st = seq->private;
2236 int offset = st->offset;
2237 int orig_num = st->num;
2238 void *rc = NULL;
2239
2240 switch (st->state) {
2241 case TCP_SEQ_STATE_OPENREQ:
2242 case TCP_SEQ_STATE_LISTENING:
2243 if (st->bucket >= INET_LHTABLE_SIZE)
2244 break;
2245 st->state = TCP_SEQ_STATE_LISTENING;
2246 rc = listening_get_next(seq, NULL);
2247 while (offset-- && rc)
2248 rc = listening_get_next(seq, rc);
2249 if (rc)
2250 break;
2251 st->bucket = 0;
2252
2253 case TCP_SEQ_STATE_ESTABLISHED:
2254 case TCP_SEQ_STATE_TIME_WAIT:
2255 st->state = TCP_SEQ_STATE_ESTABLISHED;
2256 if (st->bucket > tcp_hashinfo.ehash_mask)
2257 break;
2258 rc = established_get_first(seq);
2259 while (offset-- && rc)
2260 rc = established_get_next(seq, rc);
2261 }
2262
2263 st->num = orig_num;
2264
2265 return rc;
2266}
2267
2268static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2269{
2270 struct tcp_iter_state *st = seq->private;
2271 void *rc;
2272
2273 if (*pos && *pos == st->last_pos) {
2274 rc = tcp_seek_last_pos(seq);
2275 if (rc)
2276 goto out;
2277 }
2278
2279 st->state = TCP_SEQ_STATE_LISTENING;
2280 st->num = 0;
2281 st->bucket = 0;
2282 st->offset = 0;
2283 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2284
2285out:
2286 st->last_pos = *pos;
2287 return rc;
2288}
2289
2290static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2291{
2292 struct tcp_iter_state *st = seq->private;
2293 void *rc = NULL;
2294
2295 if (v == SEQ_START_TOKEN) {
2296 rc = tcp_get_idx(seq, 0);
2297 goto out;
2298 }
2299
2300 switch (st->state) {
2301 case TCP_SEQ_STATE_OPENREQ:
2302 case TCP_SEQ_STATE_LISTENING:
2303 rc = listening_get_next(seq, v);
2304 if (!rc) {
2305 st->state = TCP_SEQ_STATE_ESTABLISHED;
2306 st->bucket = 0;
2307 st->offset = 0;
2308 rc = established_get_first(seq);
2309 }
2310 break;
2311 case TCP_SEQ_STATE_ESTABLISHED:
2312 case TCP_SEQ_STATE_TIME_WAIT:
2313 rc = established_get_next(seq, v);
2314 break;
2315 }
2316out:
2317 ++*pos;
2318 st->last_pos = *pos;
2319 return rc;
2320}
2321
2322static void tcp_seq_stop(struct seq_file *seq, void *v)
2323{
2324 struct tcp_iter_state *st = seq->private;
2325
2326 switch (st->state) {
2327 case TCP_SEQ_STATE_OPENREQ:
2328 if (v) {
2329 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2330 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2331 }
2332 case TCP_SEQ_STATE_LISTENING:
2333 if (v != SEQ_START_TOKEN)
2334 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2335 break;
2336 case TCP_SEQ_STATE_TIME_WAIT:
2337 case TCP_SEQ_STATE_ESTABLISHED:
2338 if (v)
2339 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2340 break;
2341 }
2342}
2343
2344int tcp_seq_open(struct inode *inode, struct file *file)
2345{
2346 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2347 struct tcp_iter_state *s;
2348 int err;
2349
2350 err = seq_open_net(inode, file, &afinfo->seq_ops,
2351 sizeof(struct tcp_iter_state));
2352 if (err < 0)
2353 return err;
2354
2355 s = ((struct seq_file *)file->private_data)->private;
2356 s->family = afinfo->family;
2357 s->last_pos = 0;
2358 return 0;
2359}
2360EXPORT_SYMBOL(tcp_seq_open);
2361
2362int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2363{
2364 int rc = 0;
2365 struct proc_dir_entry *p;
2366
2367 afinfo->seq_ops.start = tcp_seq_start;
2368 afinfo->seq_ops.next = tcp_seq_next;
2369 afinfo->seq_ops.stop = tcp_seq_stop;
2370
2371 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2372 afinfo->seq_fops, afinfo);
2373 if (!p)
2374 rc = -ENOMEM;
2375 return rc;
2376}
2377EXPORT_SYMBOL(tcp_proc_register);
2378
2379void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2380{
2381 proc_net_remove(net, afinfo->name);
2382}
2383EXPORT_SYMBOL(tcp_proc_unregister);
2384
2385static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2386 struct seq_file *f, int i, int uid, int *len)
2387{
2388 const struct inet_request_sock *ireq = inet_rsk(req);
2389 int ttd = req->expires - jiffies;
2390
2391 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2392 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2393 i,
2394 ireq->loc_addr,
2395 ntohs(inet_sk(sk)->inet_sport),
2396 ireq->rmt_addr,
2397 ntohs(ireq->rmt_port),
2398 TCP_SYN_RECV,
2399 0, 0,
2400 1,
2401 jiffies_to_clock_t(ttd),
2402 req->retrans,
2403 uid,
2404 0,
2405 0,
2406 atomic_read(&sk->sk_refcnt),
2407 req,
2408 len);
2409}
2410
2411static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2412{
2413 int timer_active;
2414 unsigned long timer_expires;
2415 const struct tcp_sock *tp = tcp_sk(sk);
2416 const struct inet_connection_sock *icsk = inet_csk(sk);
2417 const struct inet_sock *inet = inet_sk(sk);
2418 __be32 dest = inet->inet_daddr;
2419 __be32 src = inet->inet_rcv_saddr;
2420 __u16 destp = ntohs(inet->inet_dport);
2421 __u16 srcp = ntohs(inet->inet_sport);
2422 int rx_queue;
2423
2424 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2425 timer_active = 1;
2426 timer_expires = icsk->icsk_timeout;
2427 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2428 timer_active = 4;
2429 timer_expires = icsk->icsk_timeout;
2430 } else if (timer_pending(&sk->sk_timer)) {
2431 timer_active = 2;
2432 timer_expires = sk->sk_timer.expires;
2433 } else {
2434 timer_active = 0;
2435 timer_expires = jiffies;
2436 }
2437
2438 if (sk->sk_state == TCP_LISTEN)
2439 rx_queue = sk->sk_ack_backlog;
2440 else
2441
2442
2443
2444 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2445
2446 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2447 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2448 i, src, srcp, dest, destp, sk->sk_state,
2449 tp->write_seq - tp->snd_una,
2450 rx_queue,
2451 timer_active,
2452 jiffies_to_clock_t(timer_expires - jiffies),
2453 icsk->icsk_retransmits,
2454 sock_i_uid(sk),
2455 icsk->icsk_probes_out,
2456 sock_i_ino(sk),
2457 atomic_read(&sk->sk_refcnt), sk,
2458 jiffies_to_clock_t(icsk->icsk_rto),
2459 jiffies_to_clock_t(icsk->icsk_ack.ato),
2460 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2461 tp->snd_cwnd,
2462 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2463 len);
2464}
2465
2466static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2467 struct seq_file *f, int i, int *len)
2468{
2469 __be32 dest, src;
2470 __u16 destp, srcp;
2471 int ttd = tw->tw_ttd - jiffies;
2472
2473 if (ttd < 0)
2474 ttd = 0;
2475
2476 dest = tw->tw_daddr;
2477 src = tw->tw_rcv_saddr;
2478 destp = ntohs(tw->tw_dport);
2479 srcp = ntohs(tw->tw_sport);
2480
2481 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2482 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2483 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2484 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2485 atomic_read(&tw->tw_refcnt), tw, len);
2486}
2487
2488#define TMPSZ 150
2489
2490static int tcp4_seq_show(struct seq_file *seq, void *v)
2491{
2492 struct tcp_iter_state *st;
2493 int len;
2494
2495 if (v == SEQ_START_TOKEN) {
2496 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2497 " sl local_address rem_address st tx_queue "
2498 "rx_queue tr tm->when retrnsmt uid timeout "
2499 "inode");
2500 goto out;
2501 }
2502 st = seq->private;
2503
2504 switch (st->state) {
2505 case TCP_SEQ_STATE_LISTENING:
2506 case TCP_SEQ_STATE_ESTABLISHED:
2507 get_tcp4_sock(v, seq, st->num, &len);
2508 break;
2509 case TCP_SEQ_STATE_OPENREQ:
2510 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2511 break;
2512 case TCP_SEQ_STATE_TIME_WAIT:
2513 get_timewait4_sock(v, seq, st->num, &len);
2514 break;
2515 }
2516 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2517out:
2518 return 0;
2519}
2520
2521static const struct file_operations tcp_afinfo_seq_fops = {
2522 .owner = THIS_MODULE,
2523 .open = tcp_seq_open,
2524 .read = seq_read,
2525 .llseek = seq_lseek,
2526 .release = seq_release_net
2527};
2528
2529static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2530 .name = "tcp",
2531 .family = AF_INET,
2532 .seq_fops = &tcp_afinfo_seq_fops,
2533 .seq_ops = {
2534 .show = tcp4_seq_show,
2535 },
2536};
2537
2538static int __net_init tcp4_proc_init_net(struct net *net)
2539{
2540 return tcp_proc_register(net, &tcp4_seq_afinfo);
2541}
2542
2543static void __net_exit tcp4_proc_exit_net(struct net *net)
2544{
2545 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2546}
2547
2548static struct pernet_operations tcp4_net_ops = {
2549 .init = tcp4_proc_init_net,
2550 .exit = tcp4_proc_exit_net,
2551};
2552
2553int __init tcp4_proc_init(void)
2554{
2555 return register_pernet_subsys(&tcp4_net_ops);
2556}
2557
2558void tcp4_proc_exit(void)
2559{
2560 unregister_pernet_subsys(&tcp4_net_ops);
2561}
2562#endif
2563
2564struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2565{
2566 const struct iphdr *iph = skb_gro_network_header(skb);
2567
2568 switch (skb->ip_summed) {
2569 case CHECKSUM_COMPLETE:
2570 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2571 skb->csum)) {
2572 skb->ip_summed = CHECKSUM_UNNECESSARY;
2573 break;
2574 }
2575
2576
2577 case CHECKSUM_NONE:
2578 NAPI_GRO_CB(skb)->flush = 1;
2579 return NULL;
2580 }
2581
2582 return tcp_gro_receive(head, skb);
2583}
2584
2585int tcp4_gro_complete(struct sk_buff *skb)
2586{
2587 const struct iphdr *iph = ip_hdr(skb);
2588 struct tcphdr *th = tcp_hdr(skb);
2589
2590 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2591 iph->saddr, iph->daddr, 0);
2592 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2593
2594 return tcp_gro_complete(skb);
2595}
2596
2597struct proto tcp_prot = {
2598 .name = "TCP",
2599 .owner = THIS_MODULE,
2600 .close = tcp_close,
2601 .connect = tcp_v4_connect,
2602 .disconnect = tcp_disconnect,
2603 .accept = inet_csk_accept,
2604 .ioctl = tcp_ioctl,
2605 .init = tcp_v4_init_sock,
2606 .destroy = tcp_v4_destroy_sock,
2607 .shutdown = tcp_shutdown,
2608 .setsockopt = tcp_setsockopt,
2609 .getsockopt = tcp_getsockopt,
2610 .recvmsg = tcp_recvmsg,
2611 .sendmsg = tcp_sendmsg,
2612 .sendpage = tcp_sendpage,
2613 .backlog_rcv = tcp_v4_do_rcv,
2614 .hash = inet_hash,
2615 .unhash = inet_unhash,
2616 .get_port = inet_csk_get_port,
2617 .enter_memory_pressure = tcp_enter_memory_pressure,
2618 .sockets_allocated = &tcp_sockets_allocated,
2619 .orphan_count = &tcp_orphan_count,
2620 .memory_allocated = &tcp_memory_allocated,
2621 .memory_pressure = &tcp_memory_pressure,
2622 .sysctl_mem = sysctl_tcp_mem,
2623 .sysctl_wmem = sysctl_tcp_wmem,
2624 .sysctl_rmem = sysctl_tcp_rmem,
2625 .max_header = MAX_TCP_HEADER,
2626 .obj_size = sizeof(struct tcp_sock),
2627 .slab_flags = SLAB_DESTROY_BY_RCU,
2628 .twsk_prot = &tcp_timewait_sock_ops,
2629 .rsk_prot = &tcp_request_sock_ops,
2630 .h.hashinfo = &tcp_hashinfo,
2631 .no_autobind = true,
2632#ifdef CONFIG_COMPAT
2633 .compat_setsockopt = compat_tcp_setsockopt,
2634 .compat_getsockopt = compat_tcp_getsockopt,
2635#endif
2636};
2637EXPORT_SYMBOL(tcp_prot);
2638
2639
2640static int __net_init tcp_sk_init(struct net *net)
2641{
2642 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2643 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2644}
2645
2646static void __net_exit tcp_sk_exit(struct net *net)
2647{
2648 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2649}
2650
2651static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2652{
2653 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2654}
2655
2656static struct pernet_operations __net_initdata tcp_sk_ops = {
2657 .init = tcp_sk_init,
2658 .exit = tcp_sk_exit,
2659 .exit_batch = tcp_sk_exit_batch,
2660};
2661
2662void __init tcp_v4_init(void)
2663{
2664 inet_hashinfo_init(&tcp_hashinfo);
2665 if (register_pernet_subsys(&tcp_sk_ops))
2666 panic("Failed to create the TCP control socket.\n");
2667}
2668