1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53#define pr_fmt(fmt) "TCP: " fmt
54
55#include <linux/bottom_half.h>
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64#include <linux/slab.h>
65
66#include <net/net_namespace.h>
67#include <net/icmp.h>
68#include <net/inet_hashtables.h>
69#include <net/tcp.h>
70#include <net/transp_v6.h>
71#include <net/ipv6.h>
72#include <net/inet_common.h>
73#include <net/timewait_sock.h>
74#include <net/xfrm.h>
75#include <net/netdma.h>
76#include <net/secure_seq.h>
77#include <net/tcp_memcontrol.h>
78
79#include <linux/inet.h>
80#include <linux/ipv6.h>
81#include <linux/stddef.h>
82#include <linux/proc_fs.h>
83#include <linux/seq_file.h>
84
85#include <linux/crypto.h>
86#include <linux/scatterlist.h>
87
88int sysctl_tcp_tw_reuse __read_mostly;
89int sysctl_tcp_low_latency __read_mostly;
90EXPORT_SYMBOL(sysctl_tcp_low_latency);
91
92
93#ifdef CONFIG_TCP_MD5SIG
94static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
95 __be32 daddr, __be32 saddr, const struct tcphdr *th);
96#endif
97
98struct inet_hashinfo tcp_hashinfo;
99EXPORT_SYMBOL(tcp_hashinfo);
100
101static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
102{
103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
107}
108
109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114
115
116
117
118
119
120
121
122
123
124
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
141static int tcp_repair_connect(struct sock *sk)
142{
143 tcp_connect_init(sk);
144 tcp_finish_connect(sk, NULL);
145
146 return 0;
147}
148
149
150int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
151{
152 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
153 struct inet_sock *inet = inet_sk(sk);
154 struct tcp_sock *tp = tcp_sk(sk);
155 __be16 orig_sport, orig_dport;
156 __be32 daddr, nexthop;
157 struct flowi4 *fl4;
158 struct rtable *rt;
159 int err;
160 struct ip_options_rcu *inet_opt;
161
162 if (addr_len < sizeof(struct sockaddr_in))
163 return -EINVAL;
164
165 if (usin->sin_family != AF_INET)
166 return -EAFNOSUPPORT;
167
168 nexthop = daddr = usin->sin_addr.s_addr;
169 inet_opt = rcu_dereference_protected(inet->inet_opt,
170 sock_owned_by_user(sk));
171 if (inet_opt && inet_opt->opt.srr) {
172 if (!daddr)
173 return -EINVAL;
174 nexthop = inet_opt->opt.faddr;
175 }
176
177 orig_sport = inet->inet_sport;
178 orig_dport = usin->sin_port;
179 fl4 = &inet->cork.fl.u.ip4;
180 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
181 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
182 IPPROTO_TCP,
183 orig_sport, orig_dport, sk, true);
184 if (IS_ERR(rt)) {
185 err = PTR_ERR(rt);
186 if (err == -ENETUNREACH)
187 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
188 return err;
189 }
190
191 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
192 ip_rt_put(rt);
193 return -ENETUNREACH;
194 }
195
196 if (!inet_opt || !inet_opt->opt.srr)
197 daddr = fl4->daddr;
198
199 if (!inet->inet_saddr)
200 inet->inet_saddr = fl4->saddr;
201 inet->inet_rcv_saddr = inet->inet_saddr;
202
203 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
204
205 tp->rx_opt.ts_recent = 0;
206 tp->rx_opt.ts_recent_stamp = 0;
207 if (likely(!tp->repair))
208 tp->write_seq = 0;
209 }
210
211 if (tcp_death_row.sysctl_tw_recycle &&
212 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
213 tcp_fetch_timewait_stamp(sk, &rt->dst);
214
215 inet->inet_dport = usin->sin_port;
216 inet->inet_daddr = daddr;
217
218 inet_csk(sk)->icsk_ext_hdr_len = 0;
219 if (inet_opt)
220 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
221
222 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
223
224
225
226
227
228
229 tcp_set_state(sk, TCP_SYN_SENT);
230 err = inet_hash_connect(&tcp_death_row, sk);
231 if (err)
232 goto failure;
233
234 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
235 inet->inet_sport, inet->inet_dport, sk);
236 if (IS_ERR(rt)) {
237 err = PTR_ERR(rt);
238 rt = NULL;
239 goto failure;
240 }
241
242 sk->sk_gso_type = SKB_GSO_TCPV4;
243 sk_setup_caps(sk, &rt->dst);
244
245 if (!tp->write_seq && likely(!tp->repair))
246 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
247 inet->inet_daddr,
248 inet->inet_sport,
249 usin->sin_port);
250
251 inet->inet_id = tp->write_seq ^ jiffies;
252
253 if (likely(!tp->repair))
254 err = tcp_connect(sk);
255 else
256 err = tcp_repair_connect(sk);
257
258 rt = NULL;
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
265
266
267
268
269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
272 inet->inet_dport = 0;
273 return err;
274}
275EXPORT_SYMBOL(tcp_v4_connect);
276
277
278
279
280
281
282static void tcp_v4_mtu_reduced(struct sock *sk)
283{
284 struct dst_entry *dst;
285 struct inet_sock *inet = inet_sk(sk);
286 u32 mtu = tcp_sk(sk)->mtu_info;
287
288
289
290
291
292 if (sk->sk_state == TCP_LISTEN)
293 return;
294
295 dst = inet_csk_update_pmtu(sk, mtu);
296 if (!dst)
297 return;
298
299
300
301
302 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
303 sk->sk_err_soft = EMSGSIZE;
304
305 mtu = dst_mtu(dst);
306
307 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
308 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
309 tcp_sync_mss(sk, mtu);
310
311
312
313
314
315
316 tcp_simple_retransmit(sk);
317 }
318}
319
320static void do_redirect(struct sk_buff *skb, struct sock *sk)
321{
322 struct dst_entry *dst = __sk_dst_check(sk, 0);
323
324 if (dst)
325 dst->ops->redirect(dst, sk, skb);
326}
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
345{
346 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
347 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
348 struct inet_connection_sock *icsk;
349 struct tcp_sock *tp;
350 struct inet_sock *inet;
351 const int type = icmp_hdr(icmp_skb)->type;
352 const int code = icmp_hdr(icmp_skb)->code;
353 struct sock *sk;
354 struct sk_buff *skb;
355 __u32 seq;
356 __u32 remaining;
357 int err;
358 struct net *net = dev_net(icmp_skb->dev);
359
360 if (icmp_skb->len < (iph->ihl << 2) + 8) {
361 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
362 return;
363 }
364
365 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
366 iph->saddr, th->source, inet_iif(icmp_skb));
367 if (!sk) {
368 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
369 return;
370 }
371 if (sk->sk_state == TCP_TIME_WAIT) {
372 inet_twsk_put(inet_twsk(sk));
373 return;
374 }
375
376 bh_lock_sock(sk);
377
378
379
380
381
382 if (sock_owned_by_user(sk) &&
383 type != ICMP_DEST_UNREACH &&
384 code != ICMP_FRAG_NEEDED)
385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
386
387 if (sk->sk_state == TCP_CLOSE)
388 goto out;
389
390 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
391 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
392 goto out;
393 }
394
395 icsk = inet_csk(sk);
396 tp = tcp_sk(sk);
397 seq = ntohl(th->seq);
398 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, tp->snd_una, tp->snd_nxt)) {
400 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
401 goto out;
402 }
403
404 switch (type) {
405 case ICMP_REDIRECT:
406 do_redirect(icmp_skb, sk);
407 goto out;
408 case ICMP_SOURCE_QUENCH:
409
410 goto out;
411 case ICMP_PARAMETERPROB:
412 err = EPROTO;
413 break;
414 case ICMP_DEST_UNREACH:
415 if (code > NR_ICMP_UNREACH)
416 goto out;
417
418 if (code == ICMP_FRAG_NEEDED) {
419 tp->mtu_info = info;
420 if (!sock_owned_by_user(sk)) {
421 tcp_v4_mtu_reduced(sk);
422 } else {
423 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
424 sock_hold(sk);
425 }
426 goto out;
427 }
428
429 err = icmp_err_convert[code].errno;
430
431
432 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
433 break;
434 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
435 !icsk->icsk_backoff)
436 break;
437
438 if (sock_owned_by_user(sk))
439 break;
440
441 icsk->icsk_backoff--;
442 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
443 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
444 tcp_bound_rto(sk);
445
446 skb = tcp_write_queue_head(sk);
447 BUG_ON(!skb);
448
449 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
450 tcp_time_stamp - TCP_SKB_CB(skb)->when);
451
452 if (remaining) {
453 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
454 remaining, TCP_RTO_MAX);
455 } else {
456
457
458 tcp_retransmit_timer(sk);
459 }
460
461 break;
462 case ICMP_TIME_EXCEEDED:
463 err = EHOSTUNREACH;
464 break;
465 default:
466 goto out;
467 }
468
469 switch (sk->sk_state) {
470 struct request_sock *req, **prev;
471 case TCP_LISTEN:
472 if (sock_owned_by_user(sk))
473 goto out;
474
475 req = inet_csk_search_req(sk, &prev, th->dest,
476 iph->daddr, iph->saddr);
477 if (!req)
478 goto out;
479
480
481
482
483 WARN_ON(req->sk);
484
485 if (seq != tcp_rsk(req)->snt_isn) {
486 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
487 goto out;
488 }
489
490
491
492
493
494
495
496 inet_csk_reqsk_queue_drop(sk, req, prev);
497 goto out;
498
499 case TCP_SYN_SENT:
500 case TCP_SYN_RECV:
501
502
503 if (!sock_owned_by_user(sk)) {
504 sk->sk_err = err;
505
506 sk->sk_error_report(sk);
507
508 tcp_done(sk);
509 } else {
510 sk->sk_err_soft = err;
511 }
512 goto out;
513 }
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531 inet = inet_sk(sk);
532 if (!sock_owned_by_user(sk) && inet->recverr) {
533 sk->sk_err = err;
534 sk->sk_error_report(sk);
535 } else {
536 sk->sk_err_soft = err;
537 }
538
539out:
540 bh_unlock_sock(sk);
541 sock_put(sk);
542}
543
544static void __tcp_v4_send_check(struct sk_buff *skb,
545 __be32 saddr, __be32 daddr)
546{
547 struct tcphdr *th = tcp_hdr(skb);
548
549 if (skb->ip_summed == CHECKSUM_PARTIAL) {
550 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
551 skb->csum_start = skb_transport_header(skb) - skb->head;
552 skb->csum_offset = offsetof(struct tcphdr, check);
553 } else {
554 th->check = tcp_v4_check(skb->len, saddr, daddr,
555 csum_partial(th,
556 th->doff << 2,
557 skb->csum));
558 }
559}
560
561
562void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
563{
564 const struct inet_sock *inet = inet_sk(sk);
565
566 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
567}
568EXPORT_SYMBOL(tcp_v4_send_check);
569
570int tcp_v4_gso_send_check(struct sk_buff *skb)
571{
572 const struct iphdr *iph;
573 struct tcphdr *th;
574
575 if (!pskb_may_pull(skb, sizeof(*th)))
576 return -EINVAL;
577
578 iph = ip_hdr(skb);
579 th = tcp_hdr(skb);
580
581 th->check = 0;
582 skb->ip_summed = CHECKSUM_PARTIAL;
583 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
584 return 0;
585}
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
601{
602 const struct tcphdr *th = tcp_hdr(skb);
603 struct {
604 struct tcphdr th;
605#ifdef CONFIG_TCP_MD5SIG
606 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
607#endif
608 } rep;
609 struct ip_reply_arg arg;
610#ifdef CONFIG_TCP_MD5SIG
611 struct tcp_md5sig_key *key;
612 const __u8 *hash_location = NULL;
613 unsigned char newhash[16];
614 int genhash;
615 struct sock *sk1 = NULL;
616#endif
617 struct net *net;
618
619
620 if (th->rst)
621 return;
622
623 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
624 return;
625
626
627 memset(&rep, 0, sizeof(rep));
628 rep.th.dest = th->source;
629 rep.th.source = th->dest;
630 rep.th.doff = sizeof(struct tcphdr) / 4;
631 rep.th.rst = 1;
632
633 if (th->ack) {
634 rep.th.seq = th->ack_seq;
635 } else {
636 rep.th.ack = 1;
637 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
638 skb->len - (th->doff << 2));
639 }
640
641 memset(&arg, 0, sizeof(arg));
642 arg.iov[0].iov_base = (unsigned char *)&rep;
643 arg.iov[0].iov_len = sizeof(rep.th);
644
645#ifdef CONFIG_TCP_MD5SIG
646 hash_location = tcp_parse_md5sig_option(th);
647 if (!sk && hash_location) {
648
649
650
651
652
653
654
655 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
656 &tcp_hashinfo, ip_hdr(skb)->daddr,
657 ntohs(th->source), inet_iif(skb));
658
659 if (!sk1)
660 return;
661 rcu_read_lock();
662 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
663 &ip_hdr(skb)->saddr, AF_INET);
664 if (!key)
665 goto release_sk1;
666
667 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
668 if (genhash || memcmp(hash_location, newhash, 16) != 0)
669 goto release_sk1;
670 } else {
671 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
672 &ip_hdr(skb)->saddr,
673 AF_INET) : NULL;
674 }
675
676 if (key) {
677 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
678 (TCPOPT_NOP << 16) |
679 (TCPOPT_MD5SIG << 8) |
680 TCPOLEN_MD5SIG);
681
682 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
683 rep.th.doff = arg.iov[0].iov_len / 4;
684
685 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
686 key, ip_hdr(skb)->saddr,
687 ip_hdr(skb)->daddr, &rep.th);
688 }
689#endif
690 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
691 ip_hdr(skb)->saddr,
692 arg.iov[0].iov_len, IPPROTO_TCP, 0);
693 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
694 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
695
696
697
698
699 if (sk)
700 arg.bound_dev_if = sk->sk_bound_dev_if;
701
702 net = dev_net(skb_dst(skb)->dev);
703 arg.tos = ip_hdr(skb)->tos;
704 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
705 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
706
707 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
708 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
709
710#ifdef CONFIG_TCP_MD5SIG
711release_sk1:
712 if (sk1) {
713 rcu_read_unlock();
714 sock_put(sk1);
715 }
716#endif
717}
718
719
720
721
722
723static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
724 u32 win, u32 ts, int oif,
725 struct tcp_md5sig_key *key,
726 int reply_flags, u8 tos)
727{
728 const struct tcphdr *th = tcp_hdr(skb);
729 struct {
730 struct tcphdr th;
731 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
732#ifdef CONFIG_TCP_MD5SIG
733 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
734#endif
735 ];
736 } rep;
737 struct ip_reply_arg arg;
738 struct net *net = dev_net(skb_dst(skb)->dev);
739
740 memset(&rep.th, 0, sizeof(struct tcphdr));
741 memset(&arg, 0, sizeof(arg));
742
743 arg.iov[0].iov_base = (unsigned char *)&rep;
744 arg.iov[0].iov_len = sizeof(rep.th);
745 if (ts) {
746 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
747 (TCPOPT_TIMESTAMP << 8) |
748 TCPOLEN_TIMESTAMP);
749 rep.opt[1] = htonl(tcp_time_stamp);
750 rep.opt[2] = htonl(ts);
751 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
752 }
753
754
755 rep.th.dest = th->source;
756 rep.th.source = th->dest;
757 rep.th.doff = arg.iov[0].iov_len / 4;
758 rep.th.seq = htonl(seq);
759 rep.th.ack_seq = htonl(ack);
760 rep.th.ack = 1;
761 rep.th.window = htons(win);
762
763#ifdef CONFIG_TCP_MD5SIG
764 if (key) {
765 int offset = (ts) ? 3 : 0;
766
767 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
768 (TCPOPT_NOP << 16) |
769 (TCPOPT_MD5SIG << 8) |
770 TCPOLEN_MD5SIG);
771 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
772 rep.th.doff = arg.iov[0].iov_len/4;
773
774 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
775 key, ip_hdr(skb)->saddr,
776 ip_hdr(skb)->daddr, &rep.th);
777 }
778#endif
779 arg.flags = reply_flags;
780 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
781 ip_hdr(skb)->saddr,
782 arg.iov[0].iov_len, IPPROTO_TCP, 0);
783 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
784 if (oif)
785 arg.bound_dev_if = oif;
786 arg.tos = tos;
787 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
788 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
789
790 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
791}
792
793static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
794{
795 struct inet_timewait_sock *tw = inet_twsk(sk);
796 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
797
798 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
799 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
800 tcptw->tw_ts_recent,
801 tw->tw_bound_dev_if,
802 tcp_twsk_md5_key(tcptw),
803 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
804 tw->tw_tos
805 );
806
807 inet_twsk_put(tw);
808}
809
810static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
811 struct request_sock *req)
812{
813 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
814 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
815 req->ts_recent,
816 0,
817 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
818 AF_INET),
819 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
820 ip_hdr(skb)->tos);
821}
822
823
824
825
826
827
828static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
829 struct request_sock *req,
830 struct request_values *rvp,
831 u16 queue_mapping,
832 bool nocache)
833{
834 const struct inet_request_sock *ireq = inet_rsk(req);
835 struct flowi4 fl4;
836 int err = -1;
837 struct sk_buff * skb;
838
839
840 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
841 return -1;
842
843 skb = tcp_make_synack(sk, dst, req, rvp);
844
845 if (skb) {
846 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
847
848 skb_set_queue_mapping(skb, queue_mapping);
849 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
850 ireq->rmt_addr,
851 ireq->opt);
852 err = net_xmit_eval(err);
853 }
854
855 return err;
856}
857
858static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
859 struct request_values *rvp)
860{
861 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
862 return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
863}
864
865
866
867
868static void tcp_v4_reqsk_destructor(struct request_sock *req)
869{
870 kfree(inet_rsk(req)->opt);
871}
872
873
874
875
876bool tcp_syn_flood_action(struct sock *sk,
877 const struct sk_buff *skb,
878 const char *proto)
879{
880 const char *msg = "Dropping request";
881 bool want_cookie = false;
882 struct listen_sock *lopt;
883
884
885
886#ifdef CONFIG_SYN_COOKIES
887 if (sysctl_tcp_syncookies) {
888 msg = "Sending cookies";
889 want_cookie = true;
890 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
891 } else
892#endif
893 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
894
895 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
896 if (!lopt->synflood_warned) {
897 lopt->synflood_warned = 1;
898 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
899 proto, ntohs(tcp_hdr(skb)->dest), msg);
900 }
901 return want_cookie;
902}
903EXPORT_SYMBOL(tcp_syn_flood_action);
904
905
906
907
908static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
909 struct sk_buff *skb)
910{
911 const struct ip_options *opt = &(IPCB(skb)->opt);
912 struct ip_options_rcu *dopt = NULL;
913
914 if (opt && opt->optlen) {
915 int opt_size = sizeof(*dopt) + opt->optlen;
916
917 dopt = kmalloc(opt_size, GFP_ATOMIC);
918 if (dopt) {
919 if (ip_options_echo(&dopt->opt, skb)) {
920 kfree(dopt);
921 dopt = NULL;
922 }
923 }
924 }
925 return dopt;
926}
927
928#ifdef CONFIG_TCP_MD5SIG
929
930
931
932
933
934
935
936struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
937 const union tcp_md5_addr *addr,
938 int family)
939{
940 struct tcp_sock *tp = tcp_sk(sk);
941 struct tcp_md5sig_key *key;
942 struct hlist_node *pos;
943 unsigned int size = sizeof(struct in_addr);
944 struct tcp_md5sig_info *md5sig;
945
946
947 md5sig = rcu_dereference_check(tp->md5sig_info,
948 sock_owned_by_user(sk) ||
949 lockdep_is_held(&sk->sk_lock.slock));
950 if (!md5sig)
951 return NULL;
952#if IS_ENABLED(CONFIG_IPV6)
953 if (family == AF_INET6)
954 size = sizeof(struct in6_addr);
955#endif
956 hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) {
957 if (key->family != family)
958 continue;
959 if (!memcmp(&key->addr, addr, size))
960 return key;
961 }
962 return NULL;
963}
964EXPORT_SYMBOL(tcp_md5_do_lookup);
965
966struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
967 struct sock *addr_sk)
968{
969 union tcp_md5_addr *addr;
970
971 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
972 return tcp_md5_do_lookup(sk, addr, AF_INET);
973}
974EXPORT_SYMBOL(tcp_v4_md5_lookup);
975
976static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
977 struct request_sock *req)
978{
979 union tcp_md5_addr *addr;
980
981 addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
982 return tcp_md5_do_lookup(sk, addr, AF_INET);
983}
984
985
986int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
987 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
988{
989
990 struct tcp_md5sig_key *key;
991 struct tcp_sock *tp = tcp_sk(sk);
992 struct tcp_md5sig_info *md5sig;
993
994 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
995 if (key) {
996
997 memcpy(key->key, newkey, newkeylen);
998 key->keylen = newkeylen;
999 return 0;
1000 }
1001
1002 md5sig = rcu_dereference_protected(tp->md5sig_info,
1003 sock_owned_by_user(sk));
1004 if (!md5sig) {
1005 md5sig = kmalloc(sizeof(*md5sig), gfp);
1006 if (!md5sig)
1007 return -ENOMEM;
1008
1009 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1010 INIT_HLIST_HEAD(&md5sig->head);
1011 rcu_assign_pointer(tp->md5sig_info, md5sig);
1012 }
1013
1014 key = sock_kmalloc(sk, sizeof(*key), gfp);
1015 if (!key)
1016 return -ENOMEM;
1017 if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
1018 sock_kfree_s(sk, key, sizeof(*key));
1019 return -ENOMEM;
1020 }
1021
1022 memcpy(key->key, newkey, newkeylen);
1023 key->keylen = newkeylen;
1024 key->family = family;
1025 memcpy(&key->addr, addr,
1026 (family == AF_INET6) ? sizeof(struct in6_addr) :
1027 sizeof(struct in_addr));
1028 hlist_add_head_rcu(&key->node, &md5sig->head);
1029 return 0;
1030}
1031EXPORT_SYMBOL(tcp_md5_do_add);
1032
1033int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1034{
1035 struct tcp_sock *tp = tcp_sk(sk);
1036 struct tcp_md5sig_key *key;
1037 struct tcp_md5sig_info *md5sig;
1038
1039 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
1040 if (!key)
1041 return -ENOENT;
1042 hlist_del_rcu(&key->node);
1043 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1044 kfree_rcu(key, rcu);
1045 md5sig = rcu_dereference_protected(tp->md5sig_info,
1046 sock_owned_by_user(sk));
1047 if (hlist_empty(&md5sig->head))
1048 tcp_free_md5sig_pool();
1049 return 0;
1050}
1051EXPORT_SYMBOL(tcp_md5_do_del);
1052
1053void tcp_clear_md5_list(struct sock *sk)
1054{
1055 struct tcp_sock *tp = tcp_sk(sk);
1056 struct tcp_md5sig_key *key;
1057 struct hlist_node *pos, *n;
1058 struct tcp_md5sig_info *md5sig;
1059
1060 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1061
1062 if (!hlist_empty(&md5sig->head))
1063 tcp_free_md5sig_pool();
1064 hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
1065 hlist_del_rcu(&key->node);
1066 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
1067 kfree_rcu(key, rcu);
1068 }
1069}
1070
1071static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1072 int optlen)
1073{
1074 struct tcp_md5sig cmd;
1075 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1076
1077 if (optlen < sizeof(cmd))
1078 return -EINVAL;
1079
1080 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1081 return -EFAULT;
1082
1083 if (sin->sin_family != AF_INET)
1084 return -EINVAL;
1085
1086 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
1087 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1088 AF_INET);
1089
1090 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1091 return -EINVAL;
1092
1093 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1094 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1095 GFP_KERNEL);
1096}
1097
1098static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1099 __be32 daddr, __be32 saddr, int nbytes)
1100{
1101 struct tcp4_pseudohdr *bp;
1102 struct scatterlist sg;
1103
1104 bp = &hp->md5_blk.ip4;
1105
1106
1107
1108
1109
1110
1111 bp->saddr = saddr;
1112 bp->daddr = daddr;
1113 bp->pad = 0;
1114 bp->protocol = IPPROTO_TCP;
1115 bp->len = cpu_to_be16(nbytes);
1116
1117 sg_init_one(&sg, bp, sizeof(*bp));
1118 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1119}
1120
1121static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1122 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1123{
1124 struct tcp_md5sig_pool *hp;
1125 struct hash_desc *desc;
1126
1127 hp = tcp_get_md5sig_pool();
1128 if (!hp)
1129 goto clear_hash_noput;
1130 desc = &hp->md5_desc;
1131
1132 if (crypto_hash_init(desc))
1133 goto clear_hash;
1134 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1135 goto clear_hash;
1136 if (tcp_md5_hash_header(hp, th))
1137 goto clear_hash;
1138 if (tcp_md5_hash_key(hp, key))
1139 goto clear_hash;
1140 if (crypto_hash_final(desc, md5_hash))
1141 goto clear_hash;
1142
1143 tcp_put_md5sig_pool();
1144 return 0;
1145
1146clear_hash:
1147 tcp_put_md5sig_pool();
1148clear_hash_noput:
1149 memset(md5_hash, 0, 16);
1150 return 1;
1151}
1152
1153int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1154 const struct sock *sk, const struct request_sock *req,
1155 const struct sk_buff *skb)
1156{
1157 struct tcp_md5sig_pool *hp;
1158 struct hash_desc *desc;
1159 const struct tcphdr *th = tcp_hdr(skb);
1160 __be32 saddr, daddr;
1161
1162 if (sk) {
1163 saddr = inet_sk(sk)->inet_saddr;
1164 daddr = inet_sk(sk)->inet_daddr;
1165 } else if (req) {
1166 saddr = inet_rsk(req)->loc_addr;
1167 daddr = inet_rsk(req)->rmt_addr;
1168 } else {
1169 const struct iphdr *iph = ip_hdr(skb);
1170 saddr = iph->saddr;
1171 daddr = iph->daddr;
1172 }
1173
1174 hp = tcp_get_md5sig_pool();
1175 if (!hp)
1176 goto clear_hash_noput;
1177 desc = &hp->md5_desc;
1178
1179 if (crypto_hash_init(desc))
1180 goto clear_hash;
1181
1182 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1183 goto clear_hash;
1184 if (tcp_md5_hash_header(hp, th))
1185 goto clear_hash;
1186 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1187 goto clear_hash;
1188 if (tcp_md5_hash_key(hp, key))
1189 goto clear_hash;
1190 if (crypto_hash_final(desc, md5_hash))
1191 goto clear_hash;
1192
1193 tcp_put_md5sig_pool();
1194 return 0;
1195
1196clear_hash:
1197 tcp_put_md5sig_pool();
1198clear_hash_noput:
1199 memset(md5_hash, 0, 16);
1200 return 1;
1201}
1202EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1203
1204static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1205{
1206
1207
1208
1209
1210
1211
1212
1213
1214 const __u8 *hash_location = NULL;
1215 struct tcp_md5sig_key *hash_expected;
1216 const struct iphdr *iph = ip_hdr(skb);
1217 const struct tcphdr *th = tcp_hdr(skb);
1218 int genhash;
1219 unsigned char newhash[16];
1220
1221 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1222 AF_INET);
1223 hash_location = tcp_parse_md5sig_option(th);
1224
1225
1226 if (!hash_expected && !hash_location)
1227 return false;
1228
1229 if (hash_expected && !hash_location) {
1230 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1231 return true;
1232 }
1233
1234 if (!hash_expected && hash_location) {
1235 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1236 return true;
1237 }
1238
1239
1240
1241
1242 genhash = tcp_v4_md5_hash_skb(newhash,
1243 hash_expected,
1244 NULL, NULL, skb);
1245
1246 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1247 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1248 &iph->saddr, ntohs(th->source),
1249 &iph->daddr, ntohs(th->dest),
1250 genhash ? " tcp_v4_calc_md5_hash failed"
1251 : "");
1252 return true;
1253 }
1254 return false;
1255}
1256
1257#endif
1258
1259struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1260 .family = PF_INET,
1261 .obj_size = sizeof(struct tcp_request_sock),
1262 .rtx_syn_ack = tcp_v4_rtx_synack,
1263 .send_ack = tcp_v4_reqsk_send_ack,
1264 .destructor = tcp_v4_reqsk_destructor,
1265 .send_reset = tcp_v4_send_reset,
1266 .syn_ack_timeout = tcp_syn_ack_timeout,
1267};
1268
1269#ifdef CONFIG_TCP_MD5SIG
1270static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1271 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1272 .calc_md5_hash = tcp_v4_md5_hash_skb,
1273};
1274#endif
1275
1276int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1277{
1278 struct tcp_extend_values tmp_ext;
1279 struct tcp_options_received tmp_opt;
1280 const u8 *hash_location;
1281 struct request_sock *req;
1282 struct inet_request_sock *ireq;
1283 struct tcp_sock *tp = tcp_sk(sk);
1284 struct dst_entry *dst = NULL;
1285 __be32 saddr = ip_hdr(skb)->saddr;
1286 __be32 daddr = ip_hdr(skb)->daddr;
1287 __u32 isn = TCP_SKB_CB(skb)->when;
1288 bool want_cookie = false;
1289
1290
1291 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1292 goto drop;
1293
1294
1295
1296
1297
1298 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1299 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1300 if (!want_cookie)
1301 goto drop;
1302 }
1303
1304
1305
1306
1307
1308
1309 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1310 goto drop;
1311
1312 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1313 if (!req)
1314 goto drop;
1315
1316#ifdef CONFIG_TCP_MD5SIG
1317 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1318#endif
1319
1320 tcp_clear_options(&tmp_opt);
1321 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1322 tmp_opt.user_mss = tp->rx_opt.user_mss;
1323 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1324
1325 if (tmp_opt.cookie_plus > 0 &&
1326 tmp_opt.saw_tstamp &&
1327 !tp->rx_opt.cookie_out_never &&
1328 (sysctl_tcp_cookie_size > 0 ||
1329 (tp->cookie_values != NULL &&
1330 tp->cookie_values->cookie_desired > 0))) {
1331 u8 *c;
1332 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1333 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1334
1335 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1336 goto drop_and_release;
1337
1338
1339 *mess++ ^= (__force u32)daddr;
1340 *mess++ ^= (__force u32)saddr;
1341
1342
1343 c = (u8 *)mess;
1344 while (l-- > 0)
1345 *c++ ^= *hash_location++;
1346
1347 want_cookie = false;
1348 tmp_ext.cookie_out_never = 0;
1349 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1350 } else if (!tp->rx_opt.cookie_in_always) {
1351
1352 tmp_ext.cookie_out_never = 1;
1353 tmp_ext.cookie_plus = 0;
1354 } else {
1355 goto drop_and_release;
1356 }
1357 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1358
1359 if (want_cookie && !tmp_opt.saw_tstamp)
1360 tcp_clear_options(&tmp_opt);
1361
1362 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1363 tcp_openreq_init(req, &tmp_opt, skb);
1364
1365 ireq = inet_rsk(req);
1366 ireq->loc_addr = daddr;
1367 ireq->rmt_addr = saddr;
1368 ireq->no_srccheck = inet_sk(sk)->transparent;
1369 ireq->opt = tcp_v4_save_options(sk, skb);
1370
1371 if (security_inet_conn_request(sk, skb, req))
1372 goto drop_and_free;
1373
1374 if (!want_cookie || tmp_opt.tstamp_ok)
1375 TCP_ECN_create_request(req, skb);
1376
1377 if (want_cookie) {
1378 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1379 req->cookie_ts = tmp_opt.tstamp_ok;
1380 } else if (!isn) {
1381 struct flowi4 fl4;
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392 if (tmp_opt.saw_tstamp &&
1393 tcp_death_row.sysctl_tw_recycle &&
1394 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1395 fl4.daddr == saddr) {
1396 if (!tcp_peer_is_proven(req, dst, true)) {
1397 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1398 goto drop_and_release;
1399 }
1400 }
1401
1402 else if (!sysctl_tcp_syncookies &&
1403 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1404 (sysctl_max_syn_backlog >> 2)) &&
1405 !tcp_peer_is_proven(req, dst, false)) {
1406
1407
1408
1409
1410
1411
1412
1413 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
1414 &saddr, ntohs(tcp_hdr(skb)->source));
1415 goto drop_and_release;
1416 }
1417
1418 isn = tcp_v4_init_sequence(skb);
1419 }
1420 tcp_rsk(req)->snt_isn = isn;
1421 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1422
1423 if (tcp_v4_send_synack(sk, dst, req,
1424 (struct request_values *)&tmp_ext,
1425 skb_get_queue_mapping(skb),
1426 want_cookie) ||
1427 want_cookie)
1428 goto drop_and_free;
1429
1430 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1431 return 0;
1432
1433drop_and_release:
1434 dst_release(dst);
1435drop_and_free:
1436 reqsk_free(req);
1437drop:
1438 return 0;
1439}
1440EXPORT_SYMBOL(tcp_v4_conn_request);
1441
1442
1443
1444
1445
1446
1447struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1448 struct request_sock *req,
1449 struct dst_entry *dst)
1450{
1451 struct inet_request_sock *ireq;
1452 struct inet_sock *newinet;
1453 struct tcp_sock *newtp;
1454 struct sock *newsk;
1455#ifdef CONFIG_TCP_MD5SIG
1456 struct tcp_md5sig_key *key;
1457#endif
1458 struct ip_options_rcu *inet_opt;
1459
1460 if (sk_acceptq_is_full(sk))
1461 goto exit_overflow;
1462
1463 newsk = tcp_create_openreq_child(sk, req, skb);
1464 if (!newsk)
1465 goto exit_nonewsk;
1466
1467 newsk->sk_gso_type = SKB_GSO_TCPV4;
1468 inet_sk_rx_dst_set(newsk, skb);
1469
1470 newtp = tcp_sk(newsk);
1471 newinet = inet_sk(newsk);
1472 ireq = inet_rsk(req);
1473 newinet->inet_daddr = ireq->rmt_addr;
1474 newinet->inet_rcv_saddr = ireq->loc_addr;
1475 newinet->inet_saddr = ireq->loc_addr;
1476 inet_opt = ireq->opt;
1477 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1478 ireq->opt = NULL;
1479 newinet->mc_index = inet_iif(skb);
1480 newinet->mc_ttl = ip_hdr(skb)->ttl;
1481 newinet->rcv_tos = ip_hdr(skb)->tos;
1482 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1483 if (inet_opt)
1484 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1485 newinet->inet_id = newtp->write_seq ^ jiffies;
1486
1487 if (!dst) {
1488 dst = inet_csk_route_child_sock(sk, newsk, req);
1489 if (!dst)
1490 goto put_and_exit;
1491 } else {
1492
1493 }
1494 sk_setup_caps(newsk, dst);
1495
1496 tcp_mtup_init(newsk);
1497 tcp_sync_mss(newsk, dst_mtu(dst));
1498 newtp->advmss = dst_metric_advmss(dst);
1499 if (tcp_sk(sk)->rx_opt.user_mss &&
1500 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1501 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1502
1503 tcp_initialize_rcv_mss(newsk);
1504 if (tcp_rsk(req)->snt_synack)
1505 tcp_valid_rtt_meas(newsk,
1506 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1507 newtp->total_retrans = req->retrans;
1508
1509#ifdef CONFIG_TCP_MD5SIG
1510
1511 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1512 AF_INET);
1513 if (key != NULL) {
1514
1515
1516
1517
1518
1519
1520 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1521 AF_INET, key->key, key->keylen, GFP_ATOMIC);
1522 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1523 }
1524#endif
1525
1526 if (__inet_inherit_port(sk, newsk) < 0)
1527 goto put_and_exit;
1528 __inet_hash_nolisten(newsk, NULL);
1529
1530 return newsk;
1531
1532exit_overflow:
1533 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1534exit_nonewsk:
1535 dst_release(dst);
1536exit:
1537 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1538 return NULL;
1539put_and_exit:
1540 tcp_clear_xmit_timers(newsk);
1541 tcp_cleanup_congestion_control(newsk);
1542 bh_unlock_sock(newsk);
1543 sock_put(newsk);
1544 goto exit;
1545}
1546EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1547
1548static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1549{
1550 struct tcphdr *th = tcp_hdr(skb);
1551 const struct iphdr *iph = ip_hdr(skb);
1552 struct sock *nsk;
1553 struct request_sock **prev;
1554
1555 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1556 iph->saddr, iph->daddr);
1557 if (req)
1558 return tcp_check_req(sk, skb, req, prev);
1559
1560 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1561 th->source, iph->daddr, th->dest, inet_iif(skb));
1562
1563 if (nsk) {
1564 if (nsk->sk_state != TCP_TIME_WAIT) {
1565 bh_lock_sock(nsk);
1566 return nsk;
1567 }
1568 inet_twsk_put(inet_twsk(nsk));
1569 return NULL;
1570 }
1571
1572#ifdef CONFIG_SYN_COOKIES
1573 if (!th->syn)
1574 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1575#endif
1576 return sk;
1577}
1578
1579static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1580{
1581 const struct iphdr *iph = ip_hdr(skb);
1582
1583 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1584 if (!tcp_v4_check(skb->len, iph->saddr,
1585 iph->daddr, skb->csum)) {
1586 skb->ip_summed = CHECKSUM_UNNECESSARY;
1587 return 0;
1588 }
1589 }
1590
1591 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1592 skb->len, IPPROTO_TCP, 0);
1593
1594 if (skb->len <= 76) {
1595 return __skb_checksum_complete(skb);
1596 }
1597 return 0;
1598}
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1610{
1611 struct sock *rsk;
1612#ifdef CONFIG_TCP_MD5SIG
1613
1614
1615
1616
1617
1618
1619 if (tcp_v4_inbound_md5_hash(sk, skb))
1620 goto discard;
1621#endif
1622
1623 if (sk->sk_state == TCP_ESTABLISHED) {
1624 struct dst_entry *dst = sk->sk_rx_dst;
1625
1626 sock_rps_save_rxhash(sk, skb);
1627 if (dst) {
1628 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1629 dst->ops->check(dst, 0) == NULL) {
1630 dst_release(dst);
1631 sk->sk_rx_dst = NULL;
1632 }
1633 }
1634 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1635 rsk = sk;
1636 goto reset;
1637 }
1638 return 0;
1639 }
1640
1641 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1642 goto csum_err;
1643
1644 if (sk->sk_state == TCP_LISTEN) {
1645 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1646 if (!nsk)
1647 goto discard;
1648
1649 if (nsk != sk) {
1650 sock_rps_save_rxhash(nsk, skb);
1651 if (tcp_child_process(sk, nsk, skb)) {
1652 rsk = nsk;
1653 goto reset;
1654 }
1655 return 0;
1656 }
1657 } else
1658 sock_rps_save_rxhash(sk, skb);
1659
1660 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1661 rsk = sk;
1662 goto reset;
1663 }
1664 return 0;
1665
1666reset:
1667 tcp_v4_send_reset(rsk, skb);
1668discard:
1669 kfree_skb(skb);
1670
1671
1672
1673
1674
1675 return 0;
1676
1677csum_err:
1678 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1679 goto discard;
1680}
1681EXPORT_SYMBOL(tcp_v4_do_rcv);
1682
1683void tcp_v4_early_demux(struct sk_buff *skb)
1684{
1685 struct net *net = dev_net(skb->dev);
1686 const struct iphdr *iph;
1687 const struct tcphdr *th;
1688 struct sock *sk;
1689
1690 if (skb->pkt_type != PACKET_HOST)
1691 return;
1692
1693 if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr)))
1694 return;
1695
1696 iph = ip_hdr(skb);
1697 th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb));
1698
1699 if (th->doff < sizeof(struct tcphdr) / 4)
1700 return;
1701
1702 sk = __inet_lookup_established(net, &tcp_hashinfo,
1703 iph->saddr, th->source,
1704 iph->daddr, ntohs(th->dest),
1705 skb->skb_iif);
1706 if (sk) {
1707 skb->sk = sk;
1708 skb->destructor = sock_edemux;
1709 if (sk->sk_state != TCP_TIME_WAIT) {
1710 struct dst_entry *dst = sk->sk_rx_dst;
1711
1712 if (dst)
1713 dst = dst_check(dst, 0);
1714 if (dst &&
1715 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1716 skb_dst_set_noref(skb, dst);
1717 }
1718 }
1719}
1720
1721
1722
1723
1724
1725int tcp_v4_rcv(struct sk_buff *skb)
1726{
1727 const struct iphdr *iph;
1728 const struct tcphdr *th;
1729 struct sock *sk;
1730 int ret;
1731 struct net *net = dev_net(skb->dev);
1732
1733 if (skb->pkt_type != PACKET_HOST)
1734 goto discard_it;
1735
1736
1737 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1738
1739 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1740 goto discard_it;
1741
1742 th = tcp_hdr(skb);
1743
1744 if (th->doff < sizeof(struct tcphdr) / 4)
1745 goto bad_packet;
1746 if (!pskb_may_pull(skb, th->doff * 4))
1747 goto discard_it;
1748
1749
1750
1751
1752
1753 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1754 goto bad_packet;
1755
1756 th = tcp_hdr(skb);
1757 iph = ip_hdr(skb);
1758 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1759 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1760 skb->len - th->doff * 4);
1761 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1762 TCP_SKB_CB(skb)->when = 0;
1763 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1764 TCP_SKB_CB(skb)->sacked = 0;
1765
1766 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1767 if (!sk)
1768 goto no_tcp_socket;
1769
1770process:
1771 if (sk->sk_state == TCP_TIME_WAIT)
1772 goto do_time_wait;
1773
1774 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1775 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1776 goto discard_and_relse;
1777 }
1778
1779 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1780 goto discard_and_relse;
1781 nf_reset(skb);
1782
1783 if (sk_filter(sk, skb))
1784 goto discard_and_relse;
1785
1786 skb->dev = NULL;
1787
1788 bh_lock_sock_nested(sk);
1789 ret = 0;
1790 if (!sock_owned_by_user(sk)) {
1791#ifdef CONFIG_NET_DMA
1792 struct tcp_sock *tp = tcp_sk(sk);
1793 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1794 tp->ucopy.dma_chan = net_dma_find_channel();
1795 if (tp->ucopy.dma_chan)
1796 ret = tcp_v4_do_rcv(sk, skb);
1797 else
1798#endif
1799 {
1800 if (!tcp_prequeue(sk, skb))
1801 ret = tcp_v4_do_rcv(sk, skb);
1802 }
1803 } else if (unlikely(sk_add_backlog(sk, skb,
1804 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1805 bh_unlock_sock(sk);
1806 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1807 goto discard_and_relse;
1808 }
1809 bh_unlock_sock(sk);
1810
1811 sock_put(sk);
1812
1813 return ret;
1814
1815no_tcp_socket:
1816 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1817 goto discard_it;
1818
1819 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1820bad_packet:
1821 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1822 } else {
1823 tcp_v4_send_reset(NULL, skb);
1824 }
1825
1826discard_it:
1827
1828 kfree_skb(skb);
1829 return 0;
1830
1831discard_and_relse:
1832 sock_put(sk);
1833 goto discard_it;
1834
1835do_time_wait:
1836 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1837 inet_twsk_put(inet_twsk(sk));
1838 goto discard_it;
1839 }
1840
1841 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1842 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1843 inet_twsk_put(inet_twsk(sk));
1844 goto discard_it;
1845 }
1846 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1847 case TCP_TW_SYN: {
1848 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1849 &tcp_hashinfo,
1850 iph->daddr, th->dest,
1851 inet_iif(skb));
1852 if (sk2) {
1853 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1854 inet_twsk_put(inet_twsk(sk));
1855 sk = sk2;
1856 goto process;
1857 }
1858
1859 }
1860 case TCP_TW_ACK:
1861 tcp_v4_timewait_ack(sk, skb);
1862 break;
1863 case TCP_TW_RST:
1864 goto no_tcp_socket;
1865 case TCP_TW_SUCCESS:;
1866 }
1867 goto discard_it;
1868}
1869
1870static struct timewait_sock_ops tcp_timewait_sock_ops = {
1871 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1872 .twsk_unique = tcp_twsk_unique,
1873 .twsk_destructor= tcp_twsk_destructor,
1874};
1875
1876void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1877{
1878 struct dst_entry *dst = skb_dst(skb);
1879
1880 dst_hold(dst);
1881 sk->sk_rx_dst = dst;
1882 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1883}
1884EXPORT_SYMBOL(inet_sk_rx_dst_set);
1885
1886const struct inet_connection_sock_af_ops ipv4_specific = {
1887 .queue_xmit = ip_queue_xmit,
1888 .send_check = tcp_v4_send_check,
1889 .rebuild_header = inet_sk_rebuild_header,
1890 .sk_rx_dst_set = inet_sk_rx_dst_set,
1891 .conn_request = tcp_v4_conn_request,
1892 .syn_recv_sock = tcp_v4_syn_recv_sock,
1893 .net_header_len = sizeof(struct iphdr),
1894 .setsockopt = ip_setsockopt,
1895 .getsockopt = ip_getsockopt,
1896 .addr2sockaddr = inet_csk_addr2sockaddr,
1897 .sockaddr_len = sizeof(struct sockaddr_in),
1898 .bind_conflict = inet_csk_bind_conflict,
1899#ifdef CONFIG_COMPAT
1900 .compat_setsockopt = compat_ip_setsockopt,
1901 .compat_getsockopt = compat_ip_getsockopt,
1902#endif
1903};
1904EXPORT_SYMBOL(ipv4_specific);
1905
1906#ifdef CONFIG_TCP_MD5SIG
1907static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1908 .md5_lookup = tcp_v4_md5_lookup,
1909 .calc_md5_hash = tcp_v4_md5_hash_skb,
1910 .md5_parse = tcp_v4_parse_md5_keys,
1911};
1912#endif
1913
1914
1915
1916
1917static int tcp_v4_init_sock(struct sock *sk)
1918{
1919 struct inet_connection_sock *icsk = inet_csk(sk);
1920
1921 tcp_init_sock(sk);
1922
1923 icsk->icsk_af_ops = &ipv4_specific;
1924
1925#ifdef CONFIG_TCP_MD5SIG
1926 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1927#endif
1928
1929 return 0;
1930}
1931
1932void tcp_v4_destroy_sock(struct sock *sk)
1933{
1934 struct tcp_sock *tp = tcp_sk(sk);
1935
1936 tcp_clear_xmit_timers(sk);
1937
1938 tcp_cleanup_congestion_control(sk);
1939
1940
1941 tcp_write_queue_purge(sk);
1942
1943
1944 __skb_queue_purge(&tp->out_of_order_queue);
1945
1946#ifdef CONFIG_TCP_MD5SIG
1947
1948 if (tp->md5sig_info) {
1949 tcp_clear_md5_list(sk);
1950 kfree_rcu(tp->md5sig_info, rcu);
1951 tp->md5sig_info = NULL;
1952 }
1953#endif
1954
1955#ifdef CONFIG_NET_DMA
1956
1957 __skb_queue_purge(&sk->sk_async_wait_queue);
1958#endif
1959
1960
1961 __skb_queue_purge(&tp->ucopy.prequeue);
1962
1963
1964 if (inet_csk(sk)->icsk_bind_hash)
1965 inet_put_port(sk);
1966
1967
1968
1969
1970 if (sk->sk_sndmsg_page) {
1971 __free_page(sk->sk_sndmsg_page);
1972 sk->sk_sndmsg_page = NULL;
1973 }
1974
1975
1976 if (tp->cookie_values != NULL) {
1977 kref_put(&tp->cookie_values->kref,
1978 tcp_cookie_values_release);
1979 tp->cookie_values = NULL;
1980 }
1981
1982
1983 tcp_free_fastopen_req(tp);
1984
1985 sk_sockets_allocated_dec(sk);
1986 sock_release_memcg(sk);
1987}
1988EXPORT_SYMBOL(tcp_v4_destroy_sock);
1989
1990#ifdef CONFIG_PROC_FS
1991
1992
1993static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1994{
1995 return hlist_nulls_empty(head) ? NULL :
1996 list_entry(head->first, struct inet_timewait_sock, tw_node);
1997}
1998
1999static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2000{
2001 return !is_a_nulls(tw->tw_node.next) ?
2002 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2003}
2004
2005
2006
2007
2008
2009
2010static void *listening_get_next(struct seq_file *seq, void *cur)
2011{
2012 struct inet_connection_sock *icsk;
2013 struct hlist_nulls_node *node;
2014 struct sock *sk = cur;
2015 struct inet_listen_hashbucket *ilb;
2016 struct tcp_iter_state *st = seq->private;
2017 struct net *net = seq_file_net(seq);
2018
2019 if (!sk) {
2020 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2021 spin_lock_bh(&ilb->lock);
2022 sk = sk_nulls_head(&ilb->head);
2023 st->offset = 0;
2024 goto get_sk;
2025 }
2026 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2027 ++st->num;
2028 ++st->offset;
2029
2030 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2031 struct request_sock *req = cur;
2032
2033 icsk = inet_csk(st->syn_wait_sk);
2034 req = req->dl_next;
2035 while (1) {
2036 while (req) {
2037 if (req->rsk_ops->family == st->family) {
2038 cur = req;
2039 goto out;
2040 }
2041 req = req->dl_next;
2042 }
2043 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2044 break;
2045get_req:
2046 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2047 }
2048 sk = sk_nulls_next(st->syn_wait_sk);
2049 st->state = TCP_SEQ_STATE_LISTENING;
2050 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2051 } else {
2052 icsk = inet_csk(sk);
2053 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2054 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2055 goto start_req;
2056 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2057 sk = sk_nulls_next(sk);
2058 }
2059get_sk:
2060 sk_nulls_for_each_from(sk, node) {
2061 if (!net_eq(sock_net(sk), net))
2062 continue;
2063 if (sk->sk_family == st->family) {
2064 cur = sk;
2065 goto out;
2066 }
2067 icsk = inet_csk(sk);
2068 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2069 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2070start_req:
2071 st->uid = sock_i_uid(sk);
2072 st->syn_wait_sk = sk;
2073 st->state = TCP_SEQ_STATE_OPENREQ;
2074 st->sbucket = 0;
2075 goto get_req;
2076 }
2077 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2078 }
2079 spin_unlock_bh(&ilb->lock);
2080 st->offset = 0;
2081 if (++st->bucket < INET_LHTABLE_SIZE) {
2082 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2083 spin_lock_bh(&ilb->lock);
2084 sk = sk_nulls_head(&ilb->head);
2085 goto get_sk;
2086 }
2087 cur = NULL;
2088out:
2089 return cur;
2090}
2091
2092static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2093{
2094 struct tcp_iter_state *st = seq->private;
2095 void *rc;
2096
2097 st->bucket = 0;
2098 st->offset = 0;
2099 rc = listening_get_next(seq, NULL);
2100
2101 while (rc && *pos) {
2102 rc = listening_get_next(seq, rc);
2103 --*pos;
2104 }
2105 return rc;
2106}
2107
2108static inline bool empty_bucket(struct tcp_iter_state *st)
2109{
2110 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2111 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2112}
2113
2114
2115
2116
2117
2118static void *established_get_first(struct seq_file *seq)
2119{
2120 struct tcp_iter_state *st = seq->private;
2121 struct net *net = seq_file_net(seq);
2122 void *rc = NULL;
2123
2124 st->offset = 0;
2125 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2126 struct sock *sk;
2127 struct hlist_nulls_node *node;
2128 struct inet_timewait_sock *tw;
2129 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2130
2131
2132 if (empty_bucket(st))
2133 continue;
2134
2135 spin_lock_bh(lock);
2136 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2137 if (sk->sk_family != st->family ||
2138 !net_eq(sock_net(sk), net)) {
2139 continue;
2140 }
2141 rc = sk;
2142 goto out;
2143 }
2144 st->state = TCP_SEQ_STATE_TIME_WAIT;
2145 inet_twsk_for_each(tw, node,
2146 &tcp_hashinfo.ehash[st->bucket].twchain) {
2147 if (tw->tw_family != st->family ||
2148 !net_eq(twsk_net(tw), net)) {
2149 continue;
2150 }
2151 rc = tw;
2152 goto out;
2153 }
2154 spin_unlock_bh(lock);
2155 st->state = TCP_SEQ_STATE_ESTABLISHED;
2156 }
2157out:
2158 return rc;
2159}
2160
2161static void *established_get_next(struct seq_file *seq, void *cur)
2162{
2163 struct sock *sk = cur;
2164 struct inet_timewait_sock *tw;
2165 struct hlist_nulls_node *node;
2166 struct tcp_iter_state *st = seq->private;
2167 struct net *net = seq_file_net(seq);
2168
2169 ++st->num;
2170 ++st->offset;
2171
2172 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2173 tw = cur;
2174 tw = tw_next(tw);
2175get_tw:
2176 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2177 tw = tw_next(tw);
2178 }
2179 if (tw) {
2180 cur = tw;
2181 goto out;
2182 }
2183 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2184 st->state = TCP_SEQ_STATE_ESTABLISHED;
2185
2186
2187 st->offset = 0;
2188 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2189 empty_bucket(st))
2190 ;
2191 if (st->bucket > tcp_hashinfo.ehash_mask)
2192 return NULL;
2193
2194 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2195 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2196 } else
2197 sk = sk_nulls_next(sk);
2198
2199 sk_nulls_for_each_from(sk, node) {
2200 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2201 goto found;
2202 }
2203
2204 st->state = TCP_SEQ_STATE_TIME_WAIT;
2205 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2206 goto get_tw;
2207found:
2208 cur = sk;
2209out:
2210 return cur;
2211}
2212
2213static void *established_get_idx(struct seq_file *seq, loff_t pos)
2214{
2215 struct tcp_iter_state *st = seq->private;
2216 void *rc;
2217
2218 st->bucket = 0;
2219 rc = established_get_first(seq);
2220
2221 while (rc && pos) {
2222 rc = established_get_next(seq, rc);
2223 --pos;
2224 }
2225 return rc;
2226}
2227
2228static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2229{
2230 void *rc;
2231 struct tcp_iter_state *st = seq->private;
2232
2233 st->state = TCP_SEQ_STATE_LISTENING;
2234 rc = listening_get_idx(seq, &pos);
2235
2236 if (!rc) {
2237 st->state = TCP_SEQ_STATE_ESTABLISHED;
2238 rc = established_get_idx(seq, pos);
2239 }
2240
2241 return rc;
2242}
2243
2244static void *tcp_seek_last_pos(struct seq_file *seq)
2245{
2246 struct tcp_iter_state *st = seq->private;
2247 int offset = st->offset;
2248 int orig_num = st->num;
2249 void *rc = NULL;
2250
2251 switch (st->state) {
2252 case TCP_SEQ_STATE_OPENREQ:
2253 case TCP_SEQ_STATE_LISTENING:
2254 if (st->bucket >= INET_LHTABLE_SIZE)
2255 break;
2256 st->state = TCP_SEQ_STATE_LISTENING;
2257 rc = listening_get_next(seq, NULL);
2258 while (offset-- && rc)
2259 rc = listening_get_next(seq, rc);
2260 if (rc)
2261 break;
2262 st->bucket = 0;
2263
2264 case TCP_SEQ_STATE_ESTABLISHED:
2265 case TCP_SEQ_STATE_TIME_WAIT:
2266 st->state = TCP_SEQ_STATE_ESTABLISHED;
2267 if (st->bucket > tcp_hashinfo.ehash_mask)
2268 break;
2269 rc = established_get_first(seq);
2270 while (offset-- && rc)
2271 rc = established_get_next(seq, rc);
2272 }
2273
2274 st->num = orig_num;
2275
2276 return rc;
2277}
2278
2279static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2280{
2281 struct tcp_iter_state *st = seq->private;
2282 void *rc;
2283
2284 if (*pos && *pos == st->last_pos) {
2285 rc = tcp_seek_last_pos(seq);
2286 if (rc)
2287 goto out;
2288 }
2289
2290 st->state = TCP_SEQ_STATE_LISTENING;
2291 st->num = 0;
2292 st->bucket = 0;
2293 st->offset = 0;
2294 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2295
2296out:
2297 st->last_pos = *pos;
2298 return rc;
2299}
2300
2301static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2302{
2303 struct tcp_iter_state *st = seq->private;
2304 void *rc = NULL;
2305
2306 if (v == SEQ_START_TOKEN) {
2307 rc = tcp_get_idx(seq, 0);
2308 goto out;
2309 }
2310
2311 switch (st->state) {
2312 case TCP_SEQ_STATE_OPENREQ:
2313 case TCP_SEQ_STATE_LISTENING:
2314 rc = listening_get_next(seq, v);
2315 if (!rc) {
2316 st->state = TCP_SEQ_STATE_ESTABLISHED;
2317 st->bucket = 0;
2318 st->offset = 0;
2319 rc = established_get_first(seq);
2320 }
2321 break;
2322 case TCP_SEQ_STATE_ESTABLISHED:
2323 case TCP_SEQ_STATE_TIME_WAIT:
2324 rc = established_get_next(seq, v);
2325 break;
2326 }
2327out:
2328 ++*pos;
2329 st->last_pos = *pos;
2330 return rc;
2331}
2332
2333static void tcp_seq_stop(struct seq_file *seq, void *v)
2334{
2335 struct tcp_iter_state *st = seq->private;
2336
2337 switch (st->state) {
2338 case TCP_SEQ_STATE_OPENREQ:
2339 if (v) {
2340 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2341 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2342 }
2343 case TCP_SEQ_STATE_LISTENING:
2344 if (v != SEQ_START_TOKEN)
2345 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2346 break;
2347 case TCP_SEQ_STATE_TIME_WAIT:
2348 case TCP_SEQ_STATE_ESTABLISHED:
2349 if (v)
2350 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2351 break;
2352 }
2353}
2354
2355int tcp_seq_open(struct inode *inode, struct file *file)
2356{
2357 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2358 struct tcp_iter_state *s;
2359 int err;
2360
2361 err = seq_open_net(inode, file, &afinfo->seq_ops,
2362 sizeof(struct tcp_iter_state));
2363 if (err < 0)
2364 return err;
2365
2366 s = ((struct seq_file *)file->private_data)->private;
2367 s->family = afinfo->family;
2368 s->last_pos = 0;
2369 return 0;
2370}
2371EXPORT_SYMBOL(tcp_seq_open);
2372
2373int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2374{
2375 int rc = 0;
2376 struct proc_dir_entry *p;
2377
2378 afinfo->seq_ops.start = tcp_seq_start;
2379 afinfo->seq_ops.next = tcp_seq_next;
2380 afinfo->seq_ops.stop = tcp_seq_stop;
2381
2382 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2383 afinfo->seq_fops, afinfo);
2384 if (!p)
2385 rc = -ENOMEM;
2386 return rc;
2387}
2388EXPORT_SYMBOL(tcp_proc_register);
2389
2390void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2391{
2392 proc_net_remove(net, afinfo->name);
2393}
2394EXPORT_SYMBOL(tcp_proc_unregister);
2395
2396static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2397 struct seq_file *f, int i, int uid, int *len)
2398{
2399 const struct inet_request_sock *ireq = inet_rsk(req);
2400 int ttd = req->expires - jiffies;
2401
2402 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2403 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2404 i,
2405 ireq->loc_addr,
2406 ntohs(inet_sk(sk)->inet_sport),
2407 ireq->rmt_addr,
2408 ntohs(ireq->rmt_port),
2409 TCP_SYN_RECV,
2410 0, 0,
2411 1,
2412 jiffies_to_clock_t(ttd),
2413 req->retrans,
2414 uid,
2415 0,
2416 0,
2417 atomic_read(&sk->sk_refcnt),
2418 req,
2419 len);
2420}
2421
2422static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2423{
2424 int timer_active;
2425 unsigned long timer_expires;
2426 const struct tcp_sock *tp = tcp_sk(sk);
2427 const struct inet_connection_sock *icsk = inet_csk(sk);
2428 const struct inet_sock *inet = inet_sk(sk);
2429 __be32 dest = inet->inet_daddr;
2430 __be32 src = inet->inet_rcv_saddr;
2431 __u16 destp = ntohs(inet->inet_dport);
2432 __u16 srcp = ntohs(inet->inet_sport);
2433 int rx_queue;
2434
2435 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2436 timer_active = 1;
2437 timer_expires = icsk->icsk_timeout;
2438 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2439 timer_active = 4;
2440 timer_expires = icsk->icsk_timeout;
2441 } else if (timer_pending(&sk->sk_timer)) {
2442 timer_active = 2;
2443 timer_expires = sk->sk_timer.expires;
2444 } else {
2445 timer_active = 0;
2446 timer_expires = jiffies;
2447 }
2448
2449 if (sk->sk_state == TCP_LISTEN)
2450 rx_queue = sk->sk_ack_backlog;
2451 else
2452
2453
2454
2455 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2456
2457 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2458 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2459 i, src, srcp, dest, destp, sk->sk_state,
2460 tp->write_seq - tp->snd_una,
2461 rx_queue,
2462 timer_active,
2463 jiffies_to_clock_t(timer_expires - jiffies),
2464 icsk->icsk_retransmits,
2465 sock_i_uid(sk),
2466 icsk->icsk_probes_out,
2467 sock_i_ino(sk),
2468 atomic_read(&sk->sk_refcnt), sk,
2469 jiffies_to_clock_t(icsk->icsk_rto),
2470 jiffies_to_clock_t(icsk->icsk_ack.ato),
2471 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2472 tp->snd_cwnd,
2473 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2474 len);
2475}
2476
2477static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2478 struct seq_file *f, int i, int *len)
2479{
2480 __be32 dest, src;
2481 __u16 destp, srcp;
2482 int ttd = tw->tw_ttd - jiffies;
2483
2484 if (ttd < 0)
2485 ttd = 0;
2486
2487 dest = tw->tw_daddr;
2488 src = tw->tw_rcv_saddr;
2489 destp = ntohs(tw->tw_dport);
2490 srcp = ntohs(tw->tw_sport);
2491
2492 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2493 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2494 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2495 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2496 atomic_read(&tw->tw_refcnt), tw, len);
2497}
2498
2499#define TMPSZ 150
2500
2501static int tcp4_seq_show(struct seq_file *seq, void *v)
2502{
2503 struct tcp_iter_state *st;
2504 int len;
2505
2506 if (v == SEQ_START_TOKEN) {
2507 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2508 " sl local_address rem_address st tx_queue "
2509 "rx_queue tr tm->when retrnsmt uid timeout "
2510 "inode");
2511 goto out;
2512 }
2513 st = seq->private;
2514
2515 switch (st->state) {
2516 case TCP_SEQ_STATE_LISTENING:
2517 case TCP_SEQ_STATE_ESTABLISHED:
2518 get_tcp4_sock(v, seq, st->num, &len);
2519 break;
2520 case TCP_SEQ_STATE_OPENREQ:
2521 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2522 break;
2523 case TCP_SEQ_STATE_TIME_WAIT:
2524 get_timewait4_sock(v, seq, st->num, &len);
2525 break;
2526 }
2527 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2528out:
2529 return 0;
2530}
2531
2532static const struct file_operations tcp_afinfo_seq_fops = {
2533 .owner = THIS_MODULE,
2534 .open = tcp_seq_open,
2535 .read = seq_read,
2536 .llseek = seq_lseek,
2537 .release = seq_release_net
2538};
2539
2540static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2541 .name = "tcp",
2542 .family = AF_INET,
2543 .seq_fops = &tcp_afinfo_seq_fops,
2544 .seq_ops = {
2545 .show = tcp4_seq_show,
2546 },
2547};
2548
2549static int __net_init tcp4_proc_init_net(struct net *net)
2550{
2551 return tcp_proc_register(net, &tcp4_seq_afinfo);
2552}
2553
2554static void __net_exit tcp4_proc_exit_net(struct net *net)
2555{
2556 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2557}
2558
2559static struct pernet_operations tcp4_net_ops = {
2560 .init = tcp4_proc_init_net,
2561 .exit = tcp4_proc_exit_net,
2562};
2563
2564int __init tcp4_proc_init(void)
2565{
2566 return register_pernet_subsys(&tcp4_net_ops);
2567}
2568
2569void tcp4_proc_exit(void)
2570{
2571 unregister_pernet_subsys(&tcp4_net_ops);
2572}
2573#endif
2574
2575struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2576{
2577 const struct iphdr *iph = skb_gro_network_header(skb);
2578
2579 switch (skb->ip_summed) {
2580 case CHECKSUM_COMPLETE:
2581 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2582 skb->csum)) {
2583 skb->ip_summed = CHECKSUM_UNNECESSARY;
2584 break;
2585 }
2586
2587
2588 case CHECKSUM_NONE:
2589 NAPI_GRO_CB(skb)->flush = 1;
2590 return NULL;
2591 }
2592
2593 return tcp_gro_receive(head, skb);
2594}
2595
2596int tcp4_gro_complete(struct sk_buff *skb)
2597{
2598 const struct iphdr *iph = ip_hdr(skb);
2599 struct tcphdr *th = tcp_hdr(skb);
2600
2601 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2602 iph->saddr, iph->daddr, 0);
2603 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2604
2605 return tcp_gro_complete(skb);
2606}
2607
2608struct proto tcp_prot = {
2609 .name = "TCP",
2610 .owner = THIS_MODULE,
2611 .close = tcp_close,
2612 .connect = tcp_v4_connect,
2613 .disconnect = tcp_disconnect,
2614 .accept = inet_csk_accept,
2615 .ioctl = tcp_ioctl,
2616 .init = tcp_v4_init_sock,
2617 .destroy = tcp_v4_destroy_sock,
2618 .shutdown = tcp_shutdown,
2619 .setsockopt = tcp_setsockopt,
2620 .getsockopt = tcp_getsockopt,
2621 .recvmsg = tcp_recvmsg,
2622 .sendmsg = tcp_sendmsg,
2623 .sendpage = tcp_sendpage,
2624 .backlog_rcv = tcp_v4_do_rcv,
2625 .release_cb = tcp_release_cb,
2626 .mtu_reduced = tcp_v4_mtu_reduced,
2627 .hash = inet_hash,
2628 .unhash = inet_unhash,
2629 .get_port = inet_csk_get_port,
2630 .enter_memory_pressure = tcp_enter_memory_pressure,
2631 .sockets_allocated = &tcp_sockets_allocated,
2632 .orphan_count = &tcp_orphan_count,
2633 .memory_allocated = &tcp_memory_allocated,
2634 .memory_pressure = &tcp_memory_pressure,
2635 .sysctl_wmem = sysctl_tcp_wmem,
2636 .sysctl_rmem = sysctl_tcp_rmem,
2637 .max_header = MAX_TCP_HEADER,
2638 .obj_size = sizeof(struct tcp_sock),
2639 .slab_flags = SLAB_DESTROY_BY_RCU,
2640 .twsk_prot = &tcp_timewait_sock_ops,
2641 .rsk_prot = &tcp_request_sock_ops,
2642 .h.hashinfo = &tcp_hashinfo,
2643 .no_autobind = true,
2644#ifdef CONFIG_COMPAT
2645 .compat_setsockopt = compat_tcp_setsockopt,
2646 .compat_getsockopt = compat_tcp_getsockopt,
2647#endif
2648#ifdef CONFIG_MEMCG_KMEM
2649 .init_cgroup = tcp_init_cgroup,
2650 .destroy_cgroup = tcp_destroy_cgroup,
2651 .proto_cgroup = tcp_proto_cgroup,
2652#endif
2653};
2654EXPORT_SYMBOL(tcp_prot);
2655
2656static int __net_init tcp_sk_init(struct net *net)
2657{
2658 return 0;
2659}
2660
2661static void __net_exit tcp_sk_exit(struct net *net)
2662{
2663}
2664
2665static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2666{
2667 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2668}
2669
2670static struct pernet_operations __net_initdata tcp_sk_ops = {
2671 .init = tcp_sk_init,
2672 .exit = tcp_sk_exit,
2673 .exit_batch = tcp_sk_exit_batch,
2674};
2675
2676void __init tcp_v4_init(void)
2677{
2678 inet_hashinfo_init(&tcp_hashinfo);
2679 if (register_pernet_subsys(&tcp_sk_ops))
2680 panic("Failed to create the TCP control socket.\n");
2681}
2682