1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54#include <linux/bottom_half.h>
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
63#include <linux/slab.h>
64
65#include <net/net_namespace.h>
66#include <net/icmp.h>
67#include <net/inet_hashtables.h>
68#include <net/tcp.h>
69#include <net/transp_v6.h>
70#include <net/ipv6.h>
71#include <net/inet_common.h>
72#include <net/timewait_sock.h>
73#include <net/xfrm.h>
74#include <net/netdma.h>
75#include <net/secure_seq.h>
76#include <net/tcp_memcontrol.h>
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
84#include <linux/crypto.h>
85#include <linux/scatterlist.h>
86
87int sysctl_tcp_tw_reuse __read_mostly;
88int sysctl_tcp_low_latency __read_mostly;
89EXPORT_SYMBOL(sysctl_tcp_low_latency);
90
91
92#ifdef CONFIG_TCP_MD5SIG
93static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
94 __be32 addr);
95static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
97#else
98static inline
99struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
100{
101 return NULL;
102}
103#endif
104
105struct inet_hashinfo tcp_hashinfo;
106EXPORT_SYMBOL(tcp_hashinfo);
107
108static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
109{
110 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
111 ip_hdr(skb)->saddr,
112 tcp_hdr(skb)->dest,
113 tcp_hdr(skb)->source);
114}
115
116int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
117{
118 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
119 struct tcp_sock *tp = tcp_sk(sk);
120
121
122
123
124
125
126
127
128
129
130
131
132 if (tcptw->tw_ts_recent_stamp &&
133 (twp == NULL || (sysctl_tcp_tw_reuse &&
134 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
135 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
136 if (tp->write_seq == 0)
137 tp->write_seq = 1;
138 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
139 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
140 sock_hold(sktw);
141 return 1;
142 }
143
144 return 0;
145}
146EXPORT_SYMBOL_GPL(tcp_twsk_unique);
147
148
149int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
150{
151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
152 struct inet_sock *inet = inet_sk(sk);
153 struct tcp_sock *tp = tcp_sk(sk);
154 __be16 orig_sport, orig_dport;
155 __be32 daddr, nexthop;
156 struct flowi4 *fl4;
157 struct rtable *rt;
158 int err;
159 struct ip_options_rcu *inet_opt;
160
161 if (addr_len < sizeof(struct sockaddr_in))
162 return -EINVAL;
163
164 if (usin->sin_family != AF_INET)
165 return -EAFNOSUPPORT;
166
167 nexthop = daddr = usin->sin_addr.s_addr;
168 inet_opt = rcu_dereference_protected(inet->inet_opt,
169 sock_owned_by_user(sk));
170 if (inet_opt && inet_opt->opt.srr) {
171 if (!daddr)
172 return -EINVAL;
173 nexthop = inet_opt->opt.faddr;
174 }
175
176 orig_sport = inet->inet_sport;
177 orig_dport = usin->sin_port;
178 fl4 = &inet->cork.fl.u.ip4;
179 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
180 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
181 IPPROTO_TCP,
182 orig_sport, orig_dport, sk, true);
183 if (IS_ERR(rt)) {
184 err = PTR_ERR(rt);
185 if (err == -ENETUNREACH)
186 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
187 return err;
188 }
189
190 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
191 ip_rt_put(rt);
192 return -ENETUNREACH;
193 }
194
195 if (!inet_opt || !inet_opt->opt.srr)
196 daddr = fl4->daddr;
197
198 if (!inet->inet_saddr)
199 inet->inet_saddr = fl4->saddr;
200 inet->inet_rcv_saddr = inet->inet_saddr;
201
202 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
203
204 tp->rx_opt.ts_recent = 0;
205 tp->rx_opt.ts_recent_stamp = 0;
206 tp->write_seq = 0;
207 }
208
209 if (tcp_death_row.sysctl_tw_recycle &&
210 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
211 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
212
213
214
215
216
217
218 if (peer) {
219 inet_peer_refcheck(peer);
220 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
221 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
222 tp->rx_opt.ts_recent = peer->tcp_ts;
223 }
224 }
225 }
226
227 inet->inet_dport = usin->sin_port;
228 inet->inet_daddr = daddr;
229
230 inet_csk(sk)->icsk_ext_hdr_len = 0;
231 if (inet_opt)
232 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
233
234 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
235
236
237
238
239
240
241 tcp_set_state(sk, TCP_SYN_SENT);
242 err = inet_hash_connect(&tcp_death_row, sk);
243 if (err)
244 goto failure;
245
246 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
247 inet->inet_sport, inet->inet_dport, sk);
248 if (IS_ERR(rt)) {
249 err = PTR_ERR(rt);
250 rt = NULL;
251 goto failure;
252 }
253
254 sk->sk_gso_type = SKB_GSO_TCPV4;
255 sk_setup_caps(sk, &rt->dst);
256
257 if (!tp->write_seq)
258 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
259 inet->inet_daddr,
260 inet->inet_sport,
261 usin->sin_port);
262
263 inet->inet_id = tp->write_seq ^ jiffies;
264
265 err = tcp_connect(sk);
266 rt = NULL;
267 if (err)
268 goto failure;
269
270 return 0;
271
272failure:
273
274
275
276
277 tcp_set_state(sk, TCP_CLOSE);
278 ip_rt_put(rt);
279 sk->sk_route_caps = 0;
280 inet->inet_dport = 0;
281 return err;
282}
283EXPORT_SYMBOL(tcp_v4_connect);
284
285
286
287
288static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
289{
290 struct dst_entry *dst;
291 struct inet_sock *inet = inet_sk(sk);
292
293
294
295
296
297 if (sk->sk_state == TCP_LISTEN)
298 return;
299
300
301
302
303
304
305
306 if ((dst = __sk_dst_check(sk, 0)) == NULL)
307 return;
308
309 dst->ops->update_pmtu(dst, mtu);
310
311
312
313
314 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
315 sk->sk_err_soft = EMSGSIZE;
316
317 mtu = dst_mtu(dst);
318
319 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
320 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
321 tcp_sync_mss(sk, mtu);
322
323
324
325
326
327
328 tcp_simple_retransmit(sk);
329 }
330}
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
349{
350 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
351 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
352 struct inet_connection_sock *icsk;
353 struct tcp_sock *tp;
354 struct inet_sock *inet;
355 const int type = icmp_hdr(icmp_skb)->type;
356 const int code = icmp_hdr(icmp_skb)->code;
357 struct sock *sk;
358 struct sk_buff *skb;
359 __u32 seq;
360 __u32 remaining;
361 int err;
362 struct net *net = dev_net(icmp_skb->dev);
363
364 if (icmp_skb->len < (iph->ihl << 2) + 8) {
365 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
366 return;
367 }
368
369 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
370 iph->saddr, th->source, inet_iif(icmp_skb));
371 if (!sk) {
372 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
373 return;
374 }
375 if (sk->sk_state == TCP_TIME_WAIT) {
376 inet_twsk_put(inet_twsk(sk));
377 return;
378 }
379
380 bh_lock_sock(sk);
381
382
383
384 if (sock_owned_by_user(sk))
385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
386
387 if (sk->sk_state == TCP_CLOSE)
388 goto out;
389
390 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
391 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
392 goto out;
393 }
394
395 icsk = inet_csk(sk);
396 tp = tcp_sk(sk);
397 seq = ntohl(th->seq);
398 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, tp->snd_una, tp->snd_nxt)) {
400 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
401 goto out;
402 }
403
404 switch (type) {
405 case ICMP_SOURCE_QUENCH:
406
407 goto out;
408 case ICMP_PARAMETERPROB:
409 err = EPROTO;
410 break;
411 case ICMP_DEST_UNREACH:
412 if (code > NR_ICMP_UNREACH)
413 goto out;
414
415 if (code == ICMP_FRAG_NEEDED) {
416 if (!sock_owned_by_user(sk))
417 do_pmtu_discovery(sk, iph, info);
418 goto out;
419 }
420
421 err = icmp_err_convert[code].errno;
422
423
424 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
425 break;
426 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
427 !icsk->icsk_backoff)
428 break;
429
430 if (sock_owned_by_user(sk))
431 break;
432
433 icsk->icsk_backoff--;
434 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
435 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
436 tcp_bound_rto(sk);
437
438 skb = tcp_write_queue_head(sk);
439 BUG_ON(!skb);
440
441 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
442 tcp_time_stamp - TCP_SKB_CB(skb)->when);
443
444 if (remaining) {
445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 remaining, TCP_RTO_MAX);
447 } else {
448
449
450 tcp_retransmit_timer(sk);
451 }
452
453 break;
454 case ICMP_TIME_EXCEEDED:
455 err = EHOSTUNREACH;
456 break;
457 default:
458 goto out;
459 }
460
461 switch (sk->sk_state) {
462 struct request_sock *req, **prev;
463 case TCP_LISTEN:
464 if (sock_owned_by_user(sk))
465 goto out;
466
467 req = inet_csk_search_req(sk, &prev, th->dest,
468 iph->daddr, iph->saddr);
469 if (!req)
470 goto out;
471
472
473
474
475 WARN_ON(req->sk);
476
477 if (seq != tcp_rsk(req)->snt_isn) {
478 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
479 goto out;
480 }
481
482
483
484
485
486
487
488 inet_csk_reqsk_queue_drop(sk, req, prev);
489 goto out;
490
491 case TCP_SYN_SENT:
492 case TCP_SYN_RECV:
493
494
495 if (!sock_owned_by_user(sk)) {
496 sk->sk_err = err;
497
498 sk->sk_error_report(sk);
499
500 tcp_done(sk);
501 } else {
502 sk->sk_err_soft = err;
503 }
504 goto out;
505 }
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523 inet = inet_sk(sk);
524 if (!sock_owned_by_user(sk) && inet->recverr) {
525 sk->sk_err = err;
526 sk->sk_error_report(sk);
527 } else {
528 sk->sk_err_soft = err;
529 }
530
531out:
532 bh_unlock_sock(sk);
533 sock_put(sk);
534}
535
536static void __tcp_v4_send_check(struct sk_buff *skb,
537 __be32 saddr, __be32 daddr)
538{
539 struct tcphdr *th = tcp_hdr(skb);
540
541 if (skb->ip_summed == CHECKSUM_PARTIAL) {
542 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
543 skb->csum_start = skb_transport_header(skb) - skb->head;
544 skb->csum_offset = offsetof(struct tcphdr, check);
545 } else {
546 th->check = tcp_v4_check(skb->len, saddr, daddr,
547 csum_partial(th,
548 th->doff << 2,
549 skb->csum));
550 }
551}
552
553
554void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
555{
556 const struct inet_sock *inet = inet_sk(sk);
557
558 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
559}
560EXPORT_SYMBOL(tcp_v4_send_check);
561
562int tcp_v4_gso_send_check(struct sk_buff *skb)
563{
564 const struct iphdr *iph;
565 struct tcphdr *th;
566
567 if (!pskb_may_pull(skb, sizeof(*th)))
568 return -EINVAL;
569
570 iph = ip_hdr(skb);
571 th = tcp_hdr(skb);
572
573 th->check = 0;
574 skb->ip_summed = CHECKSUM_PARTIAL;
575 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
576 return 0;
577}
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
593{
594 const struct tcphdr *th = tcp_hdr(skb);
595 struct {
596 struct tcphdr th;
597#ifdef CONFIG_TCP_MD5SIG
598 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
599#endif
600 } rep;
601 struct ip_reply_arg arg;
602#ifdef CONFIG_TCP_MD5SIG
603 struct tcp_md5sig_key *key;
604#endif
605 struct net *net;
606
607
608 if (th->rst)
609 return;
610
611 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
612 return;
613
614
615 memset(&rep, 0, sizeof(rep));
616 rep.th.dest = th->source;
617 rep.th.source = th->dest;
618 rep.th.doff = sizeof(struct tcphdr) / 4;
619 rep.th.rst = 1;
620
621 if (th->ack) {
622 rep.th.seq = th->ack_seq;
623 } else {
624 rep.th.ack = 1;
625 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 skb->len - (th->doff << 2));
627 }
628
629 memset(&arg, 0, sizeof(arg));
630 arg.iov[0].iov_base = (unsigned char *)&rep;
631 arg.iov[0].iov_len = sizeof(rep.th);
632
633#ifdef CONFIG_TCP_MD5SIG
634 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
635 if (key) {
636 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
637 (TCPOPT_NOP << 16) |
638 (TCPOPT_MD5SIG << 8) |
639 TCPOLEN_MD5SIG);
640
641 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
642 rep.th.doff = arg.iov[0].iov_len / 4;
643
644 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
645 key, ip_hdr(skb)->saddr,
646 ip_hdr(skb)->daddr, &rep.th);
647 }
648#endif
649 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
650 ip_hdr(skb)->saddr,
651 arg.iov[0].iov_len, IPPROTO_TCP, 0);
652 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
653 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
654
655
656
657
658 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
659
660 net = dev_net(skb_dst(skb)->dev);
661 arg.tos = ip_hdr(skb)->tos;
662 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
663 &arg, arg.iov[0].iov_len);
664
665 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
666 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
667}
668
669
670
671
672
673static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
674 u32 win, u32 ts, int oif,
675 struct tcp_md5sig_key *key,
676 int reply_flags, u8 tos)
677{
678 const struct tcphdr *th = tcp_hdr(skb);
679 struct {
680 struct tcphdr th;
681 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
682#ifdef CONFIG_TCP_MD5SIG
683 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
684#endif
685 ];
686 } rep;
687 struct ip_reply_arg arg;
688 struct net *net = dev_net(skb_dst(skb)->dev);
689
690 memset(&rep.th, 0, sizeof(struct tcphdr));
691 memset(&arg, 0, sizeof(arg));
692
693 arg.iov[0].iov_base = (unsigned char *)&rep;
694 arg.iov[0].iov_len = sizeof(rep.th);
695 if (ts) {
696 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
697 (TCPOPT_TIMESTAMP << 8) |
698 TCPOLEN_TIMESTAMP);
699 rep.opt[1] = htonl(tcp_time_stamp);
700 rep.opt[2] = htonl(ts);
701 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
702 }
703
704
705 rep.th.dest = th->source;
706 rep.th.source = th->dest;
707 rep.th.doff = arg.iov[0].iov_len / 4;
708 rep.th.seq = htonl(seq);
709 rep.th.ack_seq = htonl(ack);
710 rep.th.ack = 1;
711 rep.th.window = htons(win);
712
713#ifdef CONFIG_TCP_MD5SIG
714 if (key) {
715 int offset = (ts) ? 3 : 0;
716
717 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
718 (TCPOPT_NOP << 16) |
719 (TCPOPT_MD5SIG << 8) |
720 TCPOLEN_MD5SIG);
721 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
722 rep.th.doff = arg.iov[0].iov_len/4;
723
724 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
725 key, ip_hdr(skb)->saddr,
726 ip_hdr(skb)->daddr, &rep.th);
727 }
728#endif
729 arg.flags = reply_flags;
730 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
731 ip_hdr(skb)->saddr,
732 arg.iov[0].iov_len, IPPROTO_TCP, 0);
733 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
734 if (oif)
735 arg.bound_dev_if = oif;
736 arg.tos = tos;
737 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
738 &arg, arg.iov[0].iov_len);
739
740 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
741}
742
743static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
744{
745 struct inet_timewait_sock *tw = inet_twsk(sk);
746 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
747
748 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
749 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
750 tcptw->tw_ts_recent,
751 tw->tw_bound_dev_if,
752 tcp_twsk_md5_key(tcptw),
753 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
754 tw->tw_tos
755 );
756
757 inet_twsk_put(tw);
758}
759
760static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
761 struct request_sock *req)
762{
763 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
764 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
765 req->ts_recent,
766 0,
767 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
768 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
769 ip_hdr(skb)->tos);
770}
771
772
773
774
775
776
777static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
778 struct request_sock *req,
779 struct request_values *rvp)
780{
781 const struct inet_request_sock *ireq = inet_rsk(req);
782 struct flowi4 fl4;
783 int err = -1;
784 struct sk_buff * skb;
785
786
787 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
788 return -1;
789
790 skb = tcp_make_synack(sk, dst, req, rvp);
791
792 if (skb) {
793 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
794
795 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
796 ireq->rmt_addr,
797 ireq->opt);
798 err = net_xmit_eval(err);
799 }
800
801 dst_release(dst);
802 return err;
803}
804
805static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
806 struct request_values *rvp)
807{
808 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
809 return tcp_v4_send_synack(sk, NULL, req, rvp);
810}
811
812
813
814
815static void tcp_v4_reqsk_destructor(struct request_sock *req)
816{
817 kfree(inet_rsk(req)->opt);
818}
819
820
821
822
823int tcp_syn_flood_action(struct sock *sk,
824 const struct sk_buff *skb,
825 const char *proto)
826{
827 const char *msg = "Dropping request";
828 int want_cookie = 0;
829 struct listen_sock *lopt;
830
831
832
833#ifdef CONFIG_SYN_COOKIES
834 if (sysctl_tcp_syncookies) {
835 msg = "Sending cookies";
836 want_cookie = 1;
837 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
838 } else
839#endif
840 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
841
842 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
843 if (!lopt->synflood_warned) {
844 lopt->synflood_warned = 1;
845 pr_info("%s: Possible SYN flooding on port %d. %s. "
846 " Check SNMP counters.\n",
847 proto, ntohs(tcp_hdr(skb)->dest), msg);
848 }
849 return want_cookie;
850}
851EXPORT_SYMBOL(tcp_syn_flood_action);
852
853
854
855
856static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
857 struct sk_buff *skb)
858{
859 const struct ip_options *opt = &(IPCB(skb)->opt);
860 struct ip_options_rcu *dopt = NULL;
861
862 if (opt && opt->optlen) {
863 int opt_size = sizeof(*dopt) + opt->optlen;
864
865 dopt = kmalloc(opt_size, GFP_ATOMIC);
866 if (dopt) {
867 if (ip_options_echo(&dopt->opt, skb)) {
868 kfree(dopt);
869 dopt = NULL;
870 }
871 }
872 }
873 return dopt;
874}
875
876#ifdef CONFIG_TCP_MD5SIG
877
878
879
880
881
882
883
884static struct tcp_md5sig_key *
885 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
886{
887 struct tcp_sock *tp = tcp_sk(sk);
888 int i;
889
890 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
891 return NULL;
892 for (i = 0; i < tp->md5sig_info->entries4; i++) {
893 if (tp->md5sig_info->keys4[i].addr == addr)
894 return &tp->md5sig_info->keys4[i].base;
895 }
896 return NULL;
897}
898
899struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
900 struct sock *addr_sk)
901{
902 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
903}
904EXPORT_SYMBOL(tcp_v4_md5_lookup);
905
906static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
907 struct request_sock *req)
908{
909 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
910}
911
912
913int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
914 u8 *newkey, u8 newkeylen)
915{
916
917 struct tcp_md5sig_key *key;
918 struct tcp_sock *tp = tcp_sk(sk);
919 struct tcp4_md5sig_key *keys;
920
921 key = tcp_v4_md5_do_lookup(sk, addr);
922 if (key) {
923
924 kfree(key->key);
925 key->key = newkey;
926 key->keylen = newkeylen;
927 } else {
928 struct tcp_md5sig_info *md5sig;
929
930 if (!tp->md5sig_info) {
931 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
932 GFP_ATOMIC);
933 if (!tp->md5sig_info) {
934 kfree(newkey);
935 return -ENOMEM;
936 }
937 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
938 }
939
940 md5sig = tp->md5sig_info;
941 if (md5sig->entries4 == 0 &&
942 tcp_alloc_md5sig_pool(sk) == NULL) {
943 kfree(newkey);
944 return -ENOMEM;
945 }
946
947 if (md5sig->alloced4 == md5sig->entries4) {
948 keys = kmalloc((sizeof(*keys) *
949 (md5sig->entries4 + 1)), GFP_ATOMIC);
950 if (!keys) {
951 kfree(newkey);
952 if (md5sig->entries4 == 0)
953 tcp_free_md5sig_pool();
954 return -ENOMEM;
955 }
956
957 if (md5sig->entries4)
958 memcpy(keys, md5sig->keys4,
959 sizeof(*keys) * md5sig->entries4);
960
961
962 kfree(md5sig->keys4);
963 md5sig->keys4 = keys;
964 md5sig->alloced4++;
965 }
966 md5sig->entries4++;
967 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
968 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
969 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
970 }
971 return 0;
972}
973EXPORT_SYMBOL(tcp_v4_md5_do_add);
974
975static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
976 u8 *newkey, u8 newkeylen)
977{
978 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
979 newkey, newkeylen);
980}
981
982int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
983{
984 struct tcp_sock *tp = tcp_sk(sk);
985 int i;
986
987 for (i = 0; i < tp->md5sig_info->entries4; i++) {
988 if (tp->md5sig_info->keys4[i].addr == addr) {
989
990 kfree(tp->md5sig_info->keys4[i].base.key);
991 tp->md5sig_info->entries4--;
992
993 if (tp->md5sig_info->entries4 == 0) {
994 kfree(tp->md5sig_info->keys4);
995 tp->md5sig_info->keys4 = NULL;
996 tp->md5sig_info->alloced4 = 0;
997 tcp_free_md5sig_pool();
998 } else if (tp->md5sig_info->entries4 != i) {
999
1000 memmove(&tp->md5sig_info->keys4[i],
1001 &tp->md5sig_info->keys4[i+1],
1002 (tp->md5sig_info->entries4 - i) *
1003 sizeof(struct tcp4_md5sig_key));
1004 }
1005 return 0;
1006 }
1007 }
1008 return -ENOENT;
1009}
1010EXPORT_SYMBOL(tcp_v4_md5_do_del);
1011
1012static void tcp_v4_clear_md5_list(struct sock *sk)
1013{
1014 struct tcp_sock *tp = tcp_sk(sk);
1015
1016
1017
1018
1019
1020 if (tp->md5sig_info->entries4) {
1021 int i;
1022 for (i = 0; i < tp->md5sig_info->entries4; i++)
1023 kfree(tp->md5sig_info->keys4[i].base.key);
1024 tp->md5sig_info->entries4 = 0;
1025 tcp_free_md5sig_pool();
1026 }
1027 if (tp->md5sig_info->keys4) {
1028 kfree(tp->md5sig_info->keys4);
1029 tp->md5sig_info->keys4 = NULL;
1030 tp->md5sig_info->alloced4 = 0;
1031 }
1032}
1033
1034static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1035 int optlen)
1036{
1037 struct tcp_md5sig cmd;
1038 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1039 u8 *newkey;
1040
1041 if (optlen < sizeof(cmd))
1042 return -EINVAL;
1043
1044 if (copy_from_user(&cmd, optval, sizeof(cmd)))
1045 return -EFAULT;
1046
1047 if (sin->sin_family != AF_INET)
1048 return -EINVAL;
1049
1050 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1051 if (!tcp_sk(sk)->md5sig_info)
1052 return -ENOENT;
1053 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1054 }
1055
1056 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1057 return -EINVAL;
1058
1059 if (!tcp_sk(sk)->md5sig_info) {
1060 struct tcp_sock *tp = tcp_sk(sk);
1061 struct tcp_md5sig_info *p;
1062
1063 p = kzalloc(sizeof(*p), sk->sk_allocation);
1064 if (!p)
1065 return -EINVAL;
1066
1067 tp->md5sig_info = p;
1068 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1069 }
1070
1071 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
1072 if (!newkey)
1073 return -ENOMEM;
1074 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1075 newkey, cmd.tcpm_keylen);
1076}
1077
1078static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1079 __be32 daddr, __be32 saddr, int nbytes)
1080{
1081 struct tcp4_pseudohdr *bp;
1082 struct scatterlist sg;
1083
1084 bp = &hp->md5_blk.ip4;
1085
1086
1087
1088
1089
1090
1091 bp->saddr = saddr;
1092 bp->daddr = daddr;
1093 bp->pad = 0;
1094 bp->protocol = IPPROTO_TCP;
1095 bp->len = cpu_to_be16(nbytes);
1096
1097 sg_init_one(&sg, bp, sizeof(*bp));
1098 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1099}
1100
1101static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1102 __be32 daddr, __be32 saddr, const struct tcphdr *th)
1103{
1104 struct tcp_md5sig_pool *hp;
1105 struct hash_desc *desc;
1106
1107 hp = tcp_get_md5sig_pool();
1108 if (!hp)
1109 goto clear_hash_noput;
1110 desc = &hp->md5_desc;
1111
1112 if (crypto_hash_init(desc))
1113 goto clear_hash;
1114 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1115 goto clear_hash;
1116 if (tcp_md5_hash_header(hp, th))
1117 goto clear_hash;
1118 if (tcp_md5_hash_key(hp, key))
1119 goto clear_hash;
1120 if (crypto_hash_final(desc, md5_hash))
1121 goto clear_hash;
1122
1123 tcp_put_md5sig_pool();
1124 return 0;
1125
1126clear_hash:
1127 tcp_put_md5sig_pool();
1128clear_hash_noput:
1129 memset(md5_hash, 0, 16);
1130 return 1;
1131}
1132
1133int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1134 const struct sock *sk, const struct request_sock *req,
1135 const struct sk_buff *skb)
1136{
1137 struct tcp_md5sig_pool *hp;
1138 struct hash_desc *desc;
1139 const struct tcphdr *th = tcp_hdr(skb);
1140 __be32 saddr, daddr;
1141
1142 if (sk) {
1143 saddr = inet_sk(sk)->inet_saddr;
1144 daddr = inet_sk(sk)->inet_daddr;
1145 } else if (req) {
1146 saddr = inet_rsk(req)->loc_addr;
1147 daddr = inet_rsk(req)->rmt_addr;
1148 } else {
1149 const struct iphdr *iph = ip_hdr(skb);
1150 saddr = iph->saddr;
1151 daddr = iph->daddr;
1152 }
1153
1154 hp = tcp_get_md5sig_pool();
1155 if (!hp)
1156 goto clear_hash_noput;
1157 desc = &hp->md5_desc;
1158
1159 if (crypto_hash_init(desc))
1160 goto clear_hash;
1161
1162 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1163 goto clear_hash;
1164 if (tcp_md5_hash_header(hp, th))
1165 goto clear_hash;
1166 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1167 goto clear_hash;
1168 if (tcp_md5_hash_key(hp, key))
1169 goto clear_hash;
1170 if (crypto_hash_final(desc, md5_hash))
1171 goto clear_hash;
1172
1173 tcp_put_md5sig_pool();
1174 return 0;
1175
1176clear_hash:
1177 tcp_put_md5sig_pool();
1178clear_hash_noput:
1179 memset(md5_hash, 0, 16);
1180 return 1;
1181}
1182EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1183
1184static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1185{
1186
1187
1188
1189
1190
1191
1192
1193
1194 const __u8 *hash_location = NULL;
1195 struct tcp_md5sig_key *hash_expected;
1196 const struct iphdr *iph = ip_hdr(skb);
1197 const struct tcphdr *th = tcp_hdr(skb);
1198 int genhash;
1199 unsigned char newhash[16];
1200
1201 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1202 hash_location = tcp_parse_md5sig_option(th);
1203
1204
1205 if (!hash_expected && !hash_location)
1206 return 0;
1207
1208 if (hash_expected && !hash_location) {
1209 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1210 return 1;
1211 }
1212
1213 if (!hash_expected && hash_location) {
1214 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1215 return 1;
1216 }
1217
1218
1219
1220
1221 genhash = tcp_v4_md5_hash_skb(newhash,
1222 hash_expected,
1223 NULL, NULL, skb);
1224
1225 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1226 if (net_ratelimit()) {
1227 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1228 &iph->saddr, ntohs(th->source),
1229 &iph->daddr, ntohs(th->dest),
1230 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1231 }
1232 return 1;
1233 }
1234 return 0;
1235}
1236
1237#endif
1238
1239struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1240 .family = PF_INET,
1241 .obj_size = sizeof(struct tcp_request_sock),
1242 .rtx_syn_ack = tcp_v4_rtx_synack,
1243 .send_ack = tcp_v4_reqsk_send_ack,
1244 .destructor = tcp_v4_reqsk_destructor,
1245 .send_reset = tcp_v4_send_reset,
1246 .syn_ack_timeout = tcp_syn_ack_timeout,
1247};
1248
1249#ifdef CONFIG_TCP_MD5SIG
1250static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1251 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1252 .calc_md5_hash = tcp_v4_md5_hash_skb,
1253};
1254#endif
1255
1256int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1257{
1258 struct tcp_extend_values tmp_ext;
1259 struct tcp_options_received tmp_opt;
1260 const u8 *hash_location;
1261 struct request_sock *req;
1262 struct inet_request_sock *ireq;
1263 struct tcp_sock *tp = tcp_sk(sk);
1264 struct dst_entry *dst = NULL;
1265 __be32 saddr = ip_hdr(skb)->saddr;
1266 __be32 daddr = ip_hdr(skb)->daddr;
1267 __u32 isn = TCP_SKB_CB(skb)->when;
1268 int want_cookie = 0;
1269
1270
1271 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1272 goto drop;
1273
1274
1275
1276
1277
1278 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1279 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1280 if (!want_cookie)
1281 goto drop;
1282 }
1283
1284
1285
1286
1287
1288
1289 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1290 goto drop;
1291
1292 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1293 if (!req)
1294 goto drop;
1295
1296#ifdef CONFIG_TCP_MD5SIG
1297 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1298#endif
1299
1300 tcp_clear_options(&tmp_opt);
1301 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1302 tmp_opt.user_mss = tp->rx_opt.user_mss;
1303 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1304
1305 if (tmp_opt.cookie_plus > 0 &&
1306 tmp_opt.saw_tstamp &&
1307 !tp->rx_opt.cookie_out_never &&
1308 (sysctl_tcp_cookie_size > 0 ||
1309 (tp->cookie_values != NULL &&
1310 tp->cookie_values->cookie_desired > 0))) {
1311 u8 *c;
1312 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1313 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1314
1315 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1316 goto drop_and_release;
1317
1318
1319 *mess++ ^= (__force u32)daddr;
1320 *mess++ ^= (__force u32)saddr;
1321
1322
1323 c = (u8 *)mess;
1324 while (l-- > 0)
1325 *c++ ^= *hash_location++;
1326
1327 want_cookie = 0;
1328 tmp_ext.cookie_out_never = 0;
1329 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1330 } else if (!tp->rx_opt.cookie_in_always) {
1331
1332 tmp_ext.cookie_out_never = 1;
1333 tmp_ext.cookie_plus = 0;
1334 } else {
1335 goto drop_and_release;
1336 }
1337 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1338
1339 if (want_cookie && !tmp_opt.saw_tstamp)
1340 tcp_clear_options(&tmp_opt);
1341
1342 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1343 tcp_openreq_init(req, &tmp_opt, skb);
1344
1345 ireq = inet_rsk(req);
1346 ireq->loc_addr = daddr;
1347 ireq->rmt_addr = saddr;
1348 ireq->no_srccheck = inet_sk(sk)->transparent;
1349 ireq->opt = tcp_v4_save_options(sk, skb);
1350
1351 if (security_inet_conn_request(sk, skb, req))
1352 goto drop_and_free;
1353
1354 if (!want_cookie || tmp_opt.tstamp_ok)
1355 TCP_ECN_create_request(req, tcp_hdr(skb));
1356
1357 if (want_cookie) {
1358 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1359 req->cookie_ts = tmp_opt.tstamp_ok;
1360 } else if (!isn) {
1361 struct inet_peer *peer = NULL;
1362 struct flowi4 fl4;
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373 if (tmp_opt.saw_tstamp &&
1374 tcp_death_row.sysctl_tw_recycle &&
1375 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
1376 fl4.daddr == saddr &&
1377 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
1378 inet_peer_refcheck(peer);
1379 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1380 (s32)(peer->tcp_ts - req->ts_recent) >
1381 TCP_PAWS_WINDOW) {
1382 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1383 goto drop_and_release;
1384 }
1385 }
1386
1387 else if (!sysctl_tcp_syncookies &&
1388 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1389 (sysctl_max_syn_backlog >> 2)) &&
1390 (!peer || !peer->tcp_ts_stamp) &&
1391 (!dst || !dst_metric(dst, RTAX_RTT))) {
1392
1393
1394
1395
1396
1397
1398
1399 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1400 &saddr, ntohs(tcp_hdr(skb)->source));
1401 goto drop_and_release;
1402 }
1403
1404 isn = tcp_v4_init_sequence(skb);
1405 }
1406 tcp_rsk(req)->snt_isn = isn;
1407 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1408
1409 if (tcp_v4_send_synack(sk, dst, req,
1410 (struct request_values *)&tmp_ext) ||
1411 want_cookie)
1412 goto drop_and_free;
1413
1414 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1415 return 0;
1416
1417drop_and_release:
1418 dst_release(dst);
1419drop_and_free:
1420 reqsk_free(req);
1421drop:
1422 return 0;
1423}
1424EXPORT_SYMBOL(tcp_v4_conn_request);
1425
1426
1427
1428
1429
1430
1431struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1432 struct request_sock *req,
1433 struct dst_entry *dst)
1434{
1435 struct inet_request_sock *ireq;
1436 struct inet_sock *newinet;
1437 struct tcp_sock *newtp;
1438 struct sock *newsk;
1439#ifdef CONFIG_TCP_MD5SIG
1440 struct tcp_md5sig_key *key;
1441#endif
1442 struct ip_options_rcu *inet_opt;
1443
1444 if (sk_acceptq_is_full(sk))
1445 goto exit_overflow;
1446
1447 newsk = tcp_create_openreq_child(sk, req, skb);
1448 if (!newsk)
1449 goto exit_nonewsk;
1450
1451 newsk->sk_gso_type = SKB_GSO_TCPV4;
1452
1453 newtp = tcp_sk(newsk);
1454 newinet = inet_sk(newsk);
1455 ireq = inet_rsk(req);
1456 newinet->inet_daddr = ireq->rmt_addr;
1457 newinet->inet_rcv_saddr = ireq->loc_addr;
1458 newinet->inet_saddr = ireq->loc_addr;
1459 inet_opt = ireq->opt;
1460 rcu_assign_pointer(newinet->inet_opt, inet_opt);
1461 ireq->opt = NULL;
1462 newinet->mc_index = inet_iif(skb);
1463 newinet->mc_ttl = ip_hdr(skb)->ttl;
1464 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1465 if (inet_opt)
1466 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1467 newinet->inet_id = newtp->write_seq ^ jiffies;
1468
1469 if (!dst) {
1470 dst = inet_csk_route_child_sock(sk, newsk, req);
1471 if (!dst)
1472 goto put_and_exit;
1473 } else {
1474
1475 }
1476 sk_setup_caps(newsk, dst);
1477
1478 tcp_mtup_init(newsk);
1479 tcp_sync_mss(newsk, dst_mtu(dst));
1480 newtp->advmss = dst_metric_advmss(dst);
1481 if (tcp_sk(sk)->rx_opt.user_mss &&
1482 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1483 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1484
1485 tcp_initialize_rcv_mss(newsk);
1486 if (tcp_rsk(req)->snt_synack)
1487 tcp_valid_rtt_meas(newsk,
1488 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1489 newtp->total_retrans = req->retrans;
1490
1491#ifdef CONFIG_TCP_MD5SIG
1492
1493 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1494 if (key != NULL) {
1495
1496
1497
1498
1499
1500
1501 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1502 if (newkey != NULL)
1503 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1504 newkey, key->keylen);
1505 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1506 }
1507#endif
1508
1509 if (__inet_inherit_port(sk, newsk) < 0)
1510 goto put_and_exit;
1511 __inet_hash_nolisten(newsk, NULL);
1512
1513 return newsk;
1514
1515exit_overflow:
1516 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1517exit_nonewsk:
1518 dst_release(dst);
1519exit:
1520 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1521 return NULL;
1522put_and_exit:
1523 tcp_clear_xmit_timers(newsk);
1524 tcp_cleanup_congestion_control(newsk);
1525 bh_unlock_sock(newsk);
1526 sock_put(newsk);
1527 goto exit;
1528}
1529EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1530
1531static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1532{
1533 struct tcphdr *th = tcp_hdr(skb);
1534 const struct iphdr *iph = ip_hdr(skb);
1535 struct sock *nsk;
1536 struct request_sock **prev;
1537
1538 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1539 iph->saddr, iph->daddr);
1540 if (req)
1541 return tcp_check_req(sk, skb, req, prev);
1542
1543 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
1544 th->source, iph->daddr, th->dest, inet_iif(skb));
1545
1546 if (nsk) {
1547 if (nsk->sk_state != TCP_TIME_WAIT) {
1548 bh_lock_sock(nsk);
1549 return nsk;
1550 }
1551 inet_twsk_put(inet_twsk(nsk));
1552 return NULL;
1553 }
1554
1555#ifdef CONFIG_SYN_COOKIES
1556 if (!th->syn)
1557 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1558#endif
1559 return sk;
1560}
1561
1562static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1563{
1564 const struct iphdr *iph = ip_hdr(skb);
1565
1566 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1567 if (!tcp_v4_check(skb->len, iph->saddr,
1568 iph->daddr, skb->csum)) {
1569 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570 return 0;
1571 }
1572 }
1573
1574 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
1575 skb->len, IPPROTO_TCP, 0);
1576
1577 if (skb->len <= 76) {
1578 return __skb_checksum_complete(skb);
1579 }
1580 return 0;
1581}
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1593{
1594 struct sock *rsk;
1595#ifdef CONFIG_TCP_MD5SIG
1596
1597
1598
1599
1600
1601
1602 if (tcp_v4_inbound_md5_hash(sk, skb))
1603 goto discard;
1604#endif
1605
1606 if (sk->sk_state == TCP_ESTABLISHED) {
1607 sock_rps_save_rxhash(sk, skb);
1608 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1609 rsk = sk;
1610 goto reset;
1611 }
1612 return 0;
1613 }
1614
1615 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1616 goto csum_err;
1617
1618 if (sk->sk_state == TCP_LISTEN) {
1619 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1620 if (!nsk)
1621 goto discard;
1622
1623 if (nsk != sk) {
1624 sock_rps_save_rxhash(nsk, skb);
1625 if (tcp_child_process(sk, nsk, skb)) {
1626 rsk = nsk;
1627 goto reset;
1628 }
1629 return 0;
1630 }
1631 } else
1632 sock_rps_save_rxhash(sk, skb);
1633
1634 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
1635 rsk = sk;
1636 goto reset;
1637 }
1638 return 0;
1639
1640reset:
1641 tcp_v4_send_reset(rsk, skb);
1642discard:
1643 kfree_skb(skb);
1644
1645
1646
1647
1648
1649 return 0;
1650
1651csum_err:
1652 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1653 goto discard;
1654}
1655EXPORT_SYMBOL(tcp_v4_do_rcv);
1656
1657
1658
1659
1660
1661int tcp_v4_rcv(struct sk_buff *skb)
1662{
1663 const struct iphdr *iph;
1664 const struct tcphdr *th;
1665 struct sock *sk;
1666 int ret;
1667 struct net *net = dev_net(skb->dev);
1668
1669 if (skb->pkt_type != PACKET_HOST)
1670 goto discard_it;
1671
1672
1673 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1674
1675 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1676 goto discard_it;
1677
1678 th = tcp_hdr(skb);
1679
1680 if (th->doff < sizeof(struct tcphdr) / 4)
1681 goto bad_packet;
1682 if (!pskb_may_pull(skb, th->doff * 4))
1683 goto discard_it;
1684
1685
1686
1687
1688
1689 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1690 goto bad_packet;
1691
1692 th = tcp_hdr(skb);
1693 iph = ip_hdr(skb);
1694 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1695 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1696 skb->len - th->doff * 4);
1697 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1698 TCP_SKB_CB(skb)->when = 0;
1699 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1700 TCP_SKB_CB(skb)->sacked = 0;
1701
1702 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1703 if (!sk)
1704 goto no_tcp_socket;
1705
1706process:
1707 if (sk->sk_state == TCP_TIME_WAIT)
1708 goto do_time_wait;
1709
1710 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1711 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1712 goto discard_and_relse;
1713 }
1714
1715 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1716 goto discard_and_relse;
1717 nf_reset(skb);
1718
1719 if (sk_filter(sk, skb))
1720 goto discard_and_relse;
1721
1722 skb->dev = NULL;
1723
1724 bh_lock_sock_nested(sk);
1725 ret = 0;
1726 if (!sock_owned_by_user(sk)) {
1727#ifdef CONFIG_NET_DMA
1728 struct tcp_sock *tp = tcp_sk(sk);
1729 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1730 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1731 if (tp->ucopy.dma_chan)
1732 ret = tcp_v4_do_rcv(sk, skb);
1733 else
1734#endif
1735 {
1736 if (!tcp_prequeue(sk, skb))
1737 ret = tcp_v4_do_rcv(sk, skb);
1738 }
1739 } else if (unlikely(sk_add_backlog(sk, skb))) {
1740 bh_unlock_sock(sk);
1741 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1742 goto discard_and_relse;
1743 }
1744 bh_unlock_sock(sk);
1745
1746 sock_put(sk);
1747
1748 return ret;
1749
1750no_tcp_socket:
1751 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1752 goto discard_it;
1753
1754 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1755bad_packet:
1756 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1757 } else {
1758 tcp_v4_send_reset(NULL, skb);
1759 }
1760
1761discard_it:
1762
1763 kfree_skb(skb);
1764 return 0;
1765
1766discard_and_relse:
1767 sock_put(sk);
1768 goto discard_it;
1769
1770do_time_wait:
1771 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1772 inet_twsk_put(inet_twsk(sk));
1773 goto discard_it;
1774 }
1775
1776 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1777 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1778 inet_twsk_put(inet_twsk(sk));
1779 goto discard_it;
1780 }
1781 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1782 case TCP_TW_SYN: {
1783 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1784 &tcp_hashinfo,
1785 iph->daddr, th->dest,
1786 inet_iif(skb));
1787 if (sk2) {
1788 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1789 inet_twsk_put(inet_twsk(sk));
1790 sk = sk2;
1791 goto process;
1792 }
1793
1794 }
1795 case TCP_TW_ACK:
1796 tcp_v4_timewait_ack(sk, skb);
1797 break;
1798 case TCP_TW_RST:
1799 goto no_tcp_socket;
1800 case TCP_TW_SUCCESS:;
1801 }
1802 goto discard_it;
1803}
1804
1805struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1806{
1807 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1808 struct inet_sock *inet = inet_sk(sk);
1809 struct inet_peer *peer;
1810
1811 if (!rt ||
1812 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
1813 peer = inet_getpeer_v4(inet->inet_daddr, 1);
1814 *release_it = true;
1815 } else {
1816 if (!rt->peer)
1817 rt_bind_peer(rt, inet->inet_daddr, 1);
1818 peer = rt->peer;
1819 *release_it = false;
1820 }
1821
1822 return peer;
1823}
1824EXPORT_SYMBOL(tcp_v4_get_peer);
1825
1826void *tcp_v4_tw_get_peer(struct sock *sk)
1827{
1828 const struct inet_timewait_sock *tw = inet_twsk(sk);
1829
1830 return inet_getpeer_v4(tw->tw_daddr, 1);
1831}
1832EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1833
1834static struct timewait_sock_ops tcp_timewait_sock_ops = {
1835 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1836 .twsk_unique = tcp_twsk_unique,
1837 .twsk_destructor= tcp_twsk_destructor,
1838 .twsk_getpeer = tcp_v4_tw_get_peer,
1839};
1840
1841const struct inet_connection_sock_af_ops ipv4_specific = {
1842 .queue_xmit = ip_queue_xmit,
1843 .send_check = tcp_v4_send_check,
1844 .rebuild_header = inet_sk_rebuild_header,
1845 .conn_request = tcp_v4_conn_request,
1846 .syn_recv_sock = tcp_v4_syn_recv_sock,
1847 .get_peer = tcp_v4_get_peer,
1848 .net_header_len = sizeof(struct iphdr),
1849 .setsockopt = ip_setsockopt,
1850 .getsockopt = ip_getsockopt,
1851 .addr2sockaddr = inet_csk_addr2sockaddr,
1852 .sockaddr_len = sizeof(struct sockaddr_in),
1853 .bind_conflict = inet_csk_bind_conflict,
1854#ifdef CONFIG_COMPAT
1855 .compat_setsockopt = compat_ip_setsockopt,
1856 .compat_getsockopt = compat_ip_getsockopt,
1857#endif
1858};
1859EXPORT_SYMBOL(ipv4_specific);
1860
1861#ifdef CONFIG_TCP_MD5SIG
1862static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1863 .md5_lookup = tcp_v4_md5_lookup,
1864 .calc_md5_hash = tcp_v4_md5_hash_skb,
1865 .md5_add = tcp_v4_md5_add_func,
1866 .md5_parse = tcp_v4_parse_md5_keys,
1867};
1868#endif
1869
1870
1871
1872
1873static int tcp_v4_init_sock(struct sock *sk)
1874{
1875 struct inet_connection_sock *icsk = inet_csk(sk);
1876 struct tcp_sock *tp = tcp_sk(sk);
1877
1878 skb_queue_head_init(&tp->out_of_order_queue);
1879 tcp_init_xmit_timers(sk);
1880 tcp_prequeue_init(tp);
1881
1882 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1883 tp->mdev = TCP_TIMEOUT_INIT;
1884
1885
1886
1887
1888
1889
1890 tp->snd_cwnd = TCP_INIT_CWND;
1891
1892
1893
1894
1895 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1896 tp->snd_cwnd_clamp = ~0;
1897 tp->mss_cache = TCP_MSS_DEFAULT;
1898
1899 tp->reordering = sysctl_tcp_reordering;
1900 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1901
1902 sk->sk_state = TCP_CLOSE;
1903
1904 sk->sk_write_space = sk_stream_write_space;
1905 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1906
1907 icsk->icsk_af_ops = &ipv4_specific;
1908 icsk->icsk_sync_mss = tcp_sync_mss;
1909#ifdef CONFIG_TCP_MD5SIG
1910 tp->af_specific = &tcp_sock_ipv4_specific;
1911#endif
1912
1913
1914 if (sysctl_tcp_cookie_size > 0) {
1915
1916 tp->cookie_values =
1917 kzalloc(sizeof(*tp->cookie_values),
1918 sk->sk_allocation);
1919 if (tp->cookie_values != NULL)
1920 kref_init(&tp->cookie_values->kref);
1921 }
1922
1923
1924
1925
1926 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1927 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1928
1929 local_bh_disable();
1930 sock_update_memcg(sk);
1931 sk_sockets_allocated_inc(sk);
1932 local_bh_enable();
1933
1934 return 0;
1935}
1936
1937void tcp_v4_destroy_sock(struct sock *sk)
1938{
1939 struct tcp_sock *tp = tcp_sk(sk);
1940
1941 tcp_clear_xmit_timers(sk);
1942
1943 tcp_cleanup_congestion_control(sk);
1944
1945
1946 tcp_write_queue_purge(sk);
1947
1948
1949 __skb_queue_purge(&tp->out_of_order_queue);
1950
1951#ifdef CONFIG_TCP_MD5SIG
1952
1953 if (tp->md5sig_info) {
1954 tcp_v4_clear_md5_list(sk);
1955 kfree(tp->md5sig_info);
1956 tp->md5sig_info = NULL;
1957 }
1958#endif
1959
1960#ifdef CONFIG_NET_DMA
1961
1962 __skb_queue_purge(&sk->sk_async_wait_queue);
1963#endif
1964
1965
1966 __skb_queue_purge(&tp->ucopy.prequeue);
1967
1968
1969 if (inet_csk(sk)->icsk_bind_hash)
1970 inet_put_port(sk);
1971
1972
1973
1974
1975 if (sk->sk_sndmsg_page) {
1976 __free_page(sk->sk_sndmsg_page);
1977 sk->sk_sndmsg_page = NULL;
1978 }
1979
1980
1981 if (tp->cookie_values != NULL) {
1982 kref_put(&tp->cookie_values->kref,
1983 tcp_cookie_values_release);
1984 tp->cookie_values = NULL;
1985 }
1986
1987 sk_sockets_allocated_dec(sk);
1988 sock_release_memcg(sk);
1989}
1990EXPORT_SYMBOL(tcp_v4_destroy_sock);
1991
1992#ifdef CONFIG_PROC_FS
1993
1994
1995static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1996{
1997 return hlist_nulls_empty(head) ? NULL :
1998 list_entry(head->first, struct inet_timewait_sock, tw_node);
1999}
2000
2001static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
2002{
2003 return !is_a_nulls(tw->tw_node.next) ?
2004 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
2005}
2006
2007
2008
2009
2010
2011
2012static void *listening_get_next(struct seq_file *seq, void *cur)
2013{
2014 struct inet_connection_sock *icsk;
2015 struct hlist_nulls_node *node;
2016 struct sock *sk = cur;
2017 struct inet_listen_hashbucket *ilb;
2018 struct tcp_iter_state *st = seq->private;
2019 struct net *net = seq_file_net(seq);
2020
2021 if (!sk) {
2022 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2023 spin_lock_bh(&ilb->lock);
2024 sk = sk_nulls_head(&ilb->head);
2025 st->offset = 0;
2026 goto get_sk;
2027 }
2028 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2029 ++st->num;
2030 ++st->offset;
2031
2032 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2033 struct request_sock *req = cur;
2034
2035 icsk = inet_csk(st->syn_wait_sk);
2036 req = req->dl_next;
2037 while (1) {
2038 while (req) {
2039 if (req->rsk_ops->family == st->family) {
2040 cur = req;
2041 goto out;
2042 }
2043 req = req->dl_next;
2044 }
2045 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2046 break;
2047get_req:
2048 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
2049 }
2050 sk = sk_nulls_next(st->syn_wait_sk);
2051 st->state = TCP_SEQ_STATE_LISTENING;
2052 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2053 } else {
2054 icsk = inet_csk(sk);
2055 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2056 if (reqsk_queue_len(&icsk->icsk_accept_queue))
2057 goto start_req;
2058 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2059 sk = sk_nulls_next(sk);
2060 }
2061get_sk:
2062 sk_nulls_for_each_from(sk, node) {
2063 if (!net_eq(sock_net(sk), net))
2064 continue;
2065 if (sk->sk_family == st->family) {
2066 cur = sk;
2067 goto out;
2068 }
2069 icsk = inet_csk(sk);
2070 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2071 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
2072start_req:
2073 st->uid = sock_i_uid(sk);
2074 st->syn_wait_sk = sk;
2075 st->state = TCP_SEQ_STATE_OPENREQ;
2076 st->sbucket = 0;
2077 goto get_req;
2078 }
2079 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2080 }
2081 spin_unlock_bh(&ilb->lock);
2082 st->offset = 0;
2083 if (++st->bucket < INET_LHTABLE_SIZE) {
2084 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2085 spin_lock_bh(&ilb->lock);
2086 sk = sk_nulls_head(&ilb->head);
2087 goto get_sk;
2088 }
2089 cur = NULL;
2090out:
2091 return cur;
2092}
2093
2094static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2095{
2096 struct tcp_iter_state *st = seq->private;
2097 void *rc;
2098
2099 st->bucket = 0;
2100 st->offset = 0;
2101 rc = listening_get_next(seq, NULL);
2102
2103 while (rc && *pos) {
2104 rc = listening_get_next(seq, rc);
2105 --*pos;
2106 }
2107 return rc;
2108}
2109
2110static inline int empty_bucket(struct tcp_iter_state *st)
2111{
2112 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2113 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2114}
2115
2116
2117
2118
2119
2120static void *established_get_first(struct seq_file *seq)
2121{
2122 struct tcp_iter_state *st = seq->private;
2123 struct net *net = seq_file_net(seq);
2124 void *rc = NULL;
2125
2126 st->offset = 0;
2127 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2128 struct sock *sk;
2129 struct hlist_nulls_node *node;
2130 struct inet_timewait_sock *tw;
2131 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
2132
2133
2134 if (empty_bucket(st))
2135 continue;
2136
2137 spin_lock_bh(lock);
2138 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
2139 if (sk->sk_family != st->family ||
2140 !net_eq(sock_net(sk), net)) {
2141 continue;
2142 }
2143 rc = sk;
2144 goto out;
2145 }
2146 st->state = TCP_SEQ_STATE_TIME_WAIT;
2147 inet_twsk_for_each(tw, node,
2148 &tcp_hashinfo.ehash[st->bucket].twchain) {
2149 if (tw->tw_family != st->family ||
2150 !net_eq(twsk_net(tw), net)) {
2151 continue;
2152 }
2153 rc = tw;
2154 goto out;
2155 }
2156 spin_unlock_bh(lock);
2157 st->state = TCP_SEQ_STATE_ESTABLISHED;
2158 }
2159out:
2160 return rc;
2161}
2162
2163static void *established_get_next(struct seq_file *seq, void *cur)
2164{
2165 struct sock *sk = cur;
2166 struct inet_timewait_sock *tw;
2167 struct hlist_nulls_node *node;
2168 struct tcp_iter_state *st = seq->private;
2169 struct net *net = seq_file_net(seq);
2170
2171 ++st->num;
2172 ++st->offset;
2173
2174 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2175 tw = cur;
2176 tw = tw_next(tw);
2177get_tw:
2178 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
2179 tw = tw_next(tw);
2180 }
2181 if (tw) {
2182 cur = tw;
2183 goto out;
2184 }
2185 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2186 st->state = TCP_SEQ_STATE_ESTABLISHED;
2187
2188
2189 st->offset = 0;
2190 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2191 empty_bucket(st))
2192 ;
2193 if (st->bucket > tcp_hashinfo.ehash_mask)
2194 return NULL;
2195
2196 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2197 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
2198 } else
2199 sk = sk_nulls_next(sk);
2200
2201 sk_nulls_for_each_from(sk, node) {
2202 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
2203 goto found;
2204 }
2205
2206 st->state = TCP_SEQ_STATE_TIME_WAIT;
2207 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
2208 goto get_tw;
2209found:
2210 cur = sk;
2211out:
2212 return cur;
2213}
2214
2215static void *established_get_idx(struct seq_file *seq, loff_t pos)
2216{
2217 struct tcp_iter_state *st = seq->private;
2218 void *rc;
2219
2220 st->bucket = 0;
2221 rc = established_get_first(seq);
2222
2223 while (rc && pos) {
2224 rc = established_get_next(seq, rc);
2225 --pos;
2226 }
2227 return rc;
2228}
2229
2230static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2231{
2232 void *rc;
2233 struct tcp_iter_state *st = seq->private;
2234
2235 st->state = TCP_SEQ_STATE_LISTENING;
2236 rc = listening_get_idx(seq, &pos);
2237
2238 if (!rc) {
2239 st->state = TCP_SEQ_STATE_ESTABLISHED;
2240 rc = established_get_idx(seq, pos);
2241 }
2242
2243 return rc;
2244}
2245
2246static void *tcp_seek_last_pos(struct seq_file *seq)
2247{
2248 struct tcp_iter_state *st = seq->private;
2249 int offset = st->offset;
2250 int orig_num = st->num;
2251 void *rc = NULL;
2252
2253 switch (st->state) {
2254 case TCP_SEQ_STATE_OPENREQ:
2255 case TCP_SEQ_STATE_LISTENING:
2256 if (st->bucket >= INET_LHTABLE_SIZE)
2257 break;
2258 st->state = TCP_SEQ_STATE_LISTENING;
2259 rc = listening_get_next(seq, NULL);
2260 while (offset-- && rc)
2261 rc = listening_get_next(seq, rc);
2262 if (rc)
2263 break;
2264 st->bucket = 0;
2265
2266 case TCP_SEQ_STATE_ESTABLISHED:
2267 case TCP_SEQ_STATE_TIME_WAIT:
2268 st->state = TCP_SEQ_STATE_ESTABLISHED;
2269 if (st->bucket > tcp_hashinfo.ehash_mask)
2270 break;
2271 rc = established_get_first(seq);
2272 while (offset-- && rc)
2273 rc = established_get_next(seq, rc);
2274 }
2275
2276 st->num = orig_num;
2277
2278 return rc;
2279}
2280
2281static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2282{
2283 struct tcp_iter_state *st = seq->private;
2284 void *rc;
2285
2286 if (*pos && *pos == st->last_pos) {
2287 rc = tcp_seek_last_pos(seq);
2288 if (rc)
2289 goto out;
2290 }
2291
2292 st->state = TCP_SEQ_STATE_LISTENING;
2293 st->num = 0;
2294 st->bucket = 0;
2295 st->offset = 0;
2296 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2297
2298out:
2299 st->last_pos = *pos;
2300 return rc;
2301}
2302
2303static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2304{
2305 struct tcp_iter_state *st = seq->private;
2306 void *rc = NULL;
2307
2308 if (v == SEQ_START_TOKEN) {
2309 rc = tcp_get_idx(seq, 0);
2310 goto out;
2311 }
2312
2313 switch (st->state) {
2314 case TCP_SEQ_STATE_OPENREQ:
2315 case TCP_SEQ_STATE_LISTENING:
2316 rc = listening_get_next(seq, v);
2317 if (!rc) {
2318 st->state = TCP_SEQ_STATE_ESTABLISHED;
2319 st->bucket = 0;
2320 st->offset = 0;
2321 rc = established_get_first(seq);
2322 }
2323 break;
2324 case TCP_SEQ_STATE_ESTABLISHED:
2325 case TCP_SEQ_STATE_TIME_WAIT:
2326 rc = established_get_next(seq, v);
2327 break;
2328 }
2329out:
2330 ++*pos;
2331 st->last_pos = *pos;
2332 return rc;
2333}
2334
2335static void tcp_seq_stop(struct seq_file *seq, void *v)
2336{
2337 struct tcp_iter_state *st = seq->private;
2338
2339 switch (st->state) {
2340 case TCP_SEQ_STATE_OPENREQ:
2341 if (v) {
2342 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2343 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2344 }
2345 case TCP_SEQ_STATE_LISTENING:
2346 if (v != SEQ_START_TOKEN)
2347 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2348 break;
2349 case TCP_SEQ_STATE_TIME_WAIT:
2350 case TCP_SEQ_STATE_ESTABLISHED:
2351 if (v)
2352 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2353 break;
2354 }
2355}
2356
2357int tcp_seq_open(struct inode *inode, struct file *file)
2358{
2359 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2360 struct tcp_iter_state *s;
2361 int err;
2362
2363 err = seq_open_net(inode, file, &afinfo->seq_ops,
2364 sizeof(struct tcp_iter_state));
2365 if (err < 0)
2366 return err;
2367
2368 s = ((struct seq_file *)file->private_data)->private;
2369 s->family = afinfo->family;
2370 s->last_pos = 0;
2371 return 0;
2372}
2373EXPORT_SYMBOL(tcp_seq_open);
2374
2375int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2376{
2377 int rc = 0;
2378 struct proc_dir_entry *p;
2379
2380 afinfo->seq_ops.start = tcp_seq_start;
2381 afinfo->seq_ops.next = tcp_seq_next;
2382 afinfo->seq_ops.stop = tcp_seq_stop;
2383
2384 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2385 afinfo->seq_fops, afinfo);
2386 if (!p)
2387 rc = -ENOMEM;
2388 return rc;
2389}
2390EXPORT_SYMBOL(tcp_proc_register);
2391
2392void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2393{
2394 proc_net_remove(net, afinfo->name);
2395}
2396EXPORT_SYMBOL(tcp_proc_unregister);
2397
2398static void get_openreq4(const struct sock *sk, const struct request_sock *req,
2399 struct seq_file *f, int i, int uid, int *len)
2400{
2401 const struct inet_request_sock *ireq = inet_rsk(req);
2402 int ttd = req->expires - jiffies;
2403
2404 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2405 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
2406 i,
2407 ireq->loc_addr,
2408 ntohs(inet_sk(sk)->inet_sport),
2409 ireq->rmt_addr,
2410 ntohs(ireq->rmt_port),
2411 TCP_SYN_RECV,
2412 0, 0,
2413 1,
2414 jiffies_to_clock_t(ttd),
2415 req->retrans,
2416 uid,
2417 0,
2418 0,
2419 atomic_read(&sk->sk_refcnt),
2420 req,
2421 len);
2422}
2423
2424static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2425{
2426 int timer_active;
2427 unsigned long timer_expires;
2428 const struct tcp_sock *tp = tcp_sk(sk);
2429 const struct inet_connection_sock *icsk = inet_csk(sk);
2430 const struct inet_sock *inet = inet_sk(sk);
2431 __be32 dest = inet->inet_daddr;
2432 __be32 src = inet->inet_rcv_saddr;
2433 __u16 destp = ntohs(inet->inet_dport);
2434 __u16 srcp = ntohs(inet->inet_sport);
2435 int rx_queue;
2436
2437 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2438 timer_active = 1;
2439 timer_expires = icsk->icsk_timeout;
2440 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2441 timer_active = 4;
2442 timer_expires = icsk->icsk_timeout;
2443 } else if (timer_pending(&sk->sk_timer)) {
2444 timer_active = 2;
2445 timer_expires = sk->sk_timer.expires;
2446 } else {
2447 timer_active = 0;
2448 timer_expires = jiffies;
2449 }
2450
2451 if (sk->sk_state == TCP_LISTEN)
2452 rx_queue = sk->sk_ack_backlog;
2453 else
2454
2455
2456
2457 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2458
2459 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2460 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
2461 i, src, srcp, dest, destp, sk->sk_state,
2462 tp->write_seq - tp->snd_una,
2463 rx_queue,
2464 timer_active,
2465 jiffies_to_clock_t(timer_expires - jiffies),
2466 icsk->icsk_retransmits,
2467 sock_i_uid(sk),
2468 icsk->icsk_probes_out,
2469 sock_i_ino(sk),
2470 atomic_read(&sk->sk_refcnt), sk,
2471 jiffies_to_clock_t(icsk->icsk_rto),
2472 jiffies_to_clock_t(icsk->icsk_ack.ato),
2473 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2474 tp->snd_cwnd,
2475 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
2476 len);
2477}
2478
2479static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2480 struct seq_file *f, int i, int *len)
2481{
2482 __be32 dest, src;
2483 __u16 destp, srcp;
2484 int ttd = tw->tw_ttd - jiffies;
2485
2486 if (ttd < 0)
2487 ttd = 0;
2488
2489 dest = tw->tw_daddr;
2490 src = tw->tw_rcv_saddr;
2491 destp = ntohs(tw->tw_dport);
2492 srcp = ntohs(tw->tw_sport);
2493
2494 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2495 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
2496 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2497 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2498 atomic_read(&tw->tw_refcnt), tw, len);
2499}
2500
2501#define TMPSZ 150
2502
2503static int tcp4_seq_show(struct seq_file *seq, void *v)
2504{
2505 struct tcp_iter_state *st;
2506 int len;
2507
2508 if (v == SEQ_START_TOKEN) {
2509 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2510 " sl local_address rem_address st tx_queue "
2511 "rx_queue tr tm->when retrnsmt uid timeout "
2512 "inode");
2513 goto out;
2514 }
2515 st = seq->private;
2516
2517 switch (st->state) {
2518 case TCP_SEQ_STATE_LISTENING:
2519 case TCP_SEQ_STATE_ESTABLISHED:
2520 get_tcp4_sock(v, seq, st->num, &len);
2521 break;
2522 case TCP_SEQ_STATE_OPENREQ:
2523 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
2524 break;
2525 case TCP_SEQ_STATE_TIME_WAIT:
2526 get_timewait4_sock(v, seq, st->num, &len);
2527 break;
2528 }
2529 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
2530out:
2531 return 0;
2532}
2533
2534static const struct file_operations tcp_afinfo_seq_fops = {
2535 .owner = THIS_MODULE,
2536 .open = tcp_seq_open,
2537 .read = seq_read,
2538 .llseek = seq_lseek,
2539 .release = seq_release_net
2540};
2541
2542static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2543 .name = "tcp",
2544 .family = AF_INET,
2545 .seq_fops = &tcp_afinfo_seq_fops,
2546 .seq_ops = {
2547 .show = tcp4_seq_show,
2548 },
2549};
2550
2551static int __net_init tcp4_proc_init_net(struct net *net)
2552{
2553 return tcp_proc_register(net, &tcp4_seq_afinfo);
2554}
2555
2556static void __net_exit tcp4_proc_exit_net(struct net *net)
2557{
2558 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2559}
2560
2561static struct pernet_operations tcp4_net_ops = {
2562 .init = tcp4_proc_init_net,
2563 .exit = tcp4_proc_exit_net,
2564};
2565
2566int __init tcp4_proc_init(void)
2567{
2568 return register_pernet_subsys(&tcp4_net_ops);
2569}
2570
2571void tcp4_proc_exit(void)
2572{
2573 unregister_pernet_subsys(&tcp4_net_ops);
2574}
2575#endif
2576
2577struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2578{
2579 const struct iphdr *iph = skb_gro_network_header(skb);
2580
2581 switch (skb->ip_summed) {
2582 case CHECKSUM_COMPLETE:
2583 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2584 skb->csum)) {
2585 skb->ip_summed = CHECKSUM_UNNECESSARY;
2586 break;
2587 }
2588
2589
2590 case CHECKSUM_NONE:
2591 NAPI_GRO_CB(skb)->flush = 1;
2592 return NULL;
2593 }
2594
2595 return tcp_gro_receive(head, skb);
2596}
2597
2598int tcp4_gro_complete(struct sk_buff *skb)
2599{
2600 const struct iphdr *iph = ip_hdr(skb);
2601 struct tcphdr *th = tcp_hdr(skb);
2602
2603 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2604 iph->saddr, iph->daddr, 0);
2605 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2606
2607 return tcp_gro_complete(skb);
2608}
2609
2610struct proto tcp_prot = {
2611 .name = "TCP",
2612 .owner = THIS_MODULE,
2613 .close = tcp_close,
2614 .connect = tcp_v4_connect,
2615 .disconnect = tcp_disconnect,
2616 .accept = inet_csk_accept,
2617 .ioctl = tcp_ioctl,
2618 .init = tcp_v4_init_sock,
2619 .destroy = tcp_v4_destroy_sock,
2620 .shutdown = tcp_shutdown,
2621 .setsockopt = tcp_setsockopt,
2622 .getsockopt = tcp_getsockopt,
2623 .recvmsg = tcp_recvmsg,
2624 .sendmsg = tcp_sendmsg,
2625 .sendpage = tcp_sendpage,
2626 .backlog_rcv = tcp_v4_do_rcv,
2627 .hash = inet_hash,
2628 .unhash = inet_unhash,
2629 .get_port = inet_csk_get_port,
2630 .enter_memory_pressure = tcp_enter_memory_pressure,
2631 .sockets_allocated = &tcp_sockets_allocated,
2632 .orphan_count = &tcp_orphan_count,
2633 .memory_allocated = &tcp_memory_allocated,
2634 .memory_pressure = &tcp_memory_pressure,
2635 .sysctl_wmem = sysctl_tcp_wmem,
2636 .sysctl_rmem = sysctl_tcp_rmem,
2637 .max_header = MAX_TCP_HEADER,
2638 .obj_size = sizeof(struct tcp_sock),
2639 .slab_flags = SLAB_DESTROY_BY_RCU,
2640 .twsk_prot = &tcp_timewait_sock_ops,
2641 .rsk_prot = &tcp_request_sock_ops,
2642 .h.hashinfo = &tcp_hashinfo,
2643 .no_autobind = true,
2644#ifdef CONFIG_COMPAT
2645 .compat_setsockopt = compat_tcp_setsockopt,
2646 .compat_getsockopt = compat_tcp_getsockopt,
2647#endif
2648#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2649 .init_cgroup = tcp_init_cgroup,
2650 .destroy_cgroup = tcp_destroy_cgroup,
2651 .proto_cgroup = tcp_proto_cgroup,
2652#endif
2653};
2654EXPORT_SYMBOL(tcp_prot);
2655
2656static int __net_init tcp_sk_init(struct net *net)
2657{
2658 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2659 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2660}
2661
2662static void __net_exit tcp_sk_exit(struct net *net)
2663{
2664 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
2665}
2666
2667static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2668{
2669 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2670}
2671
2672static struct pernet_operations __net_initdata tcp_sk_ops = {
2673 .init = tcp_sk_init,
2674 .exit = tcp_sk_exit,
2675 .exit_batch = tcp_sk_exit_batch,
2676};
2677
2678void __init tcp_v4_init(void)
2679{
2680 inet_hashinfo_init(&tcp_hashinfo);
2681 if (register_pernet_subsys(&tcp_sk_ops))
2682 panic("Failed to create the TCP control socket.\n");
2683}
2684