1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/mm.h>
22#include <linux/module.h>
23#include <linux/sysctl.h>
24#include <linux/workqueue.h>
25#include <net/tcp.h>
26#include <net/inet_common.h>
27#include <net/xfrm.h>
28
29#ifdef CONFIG_SYSCTL
30#define SYNC_INIT 0
31#else
32#define SYNC_INIT 1
33#endif
34
35int sysctl_tcp_syncookies __read_mostly = SYNC_INIT;
36EXPORT_SYMBOL(sysctl_tcp_syncookies);
37
38int sysctl_tcp_abort_on_overflow __read_mostly;
39
40struct inet_timewait_death_row tcp_death_row = {
41 .sysctl_max_tw_buckets = NR_FILE * 2,
42 .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
43 .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
44 .hashinfo = &tcp_hashinfo,
45 .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
46 (unsigned long)&tcp_death_row),
47 .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
48 inet_twdr_twkill_work),
49
50
51 .twcal_hand = -1,
52 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
53 (unsigned long)&tcp_death_row),
54};
55
56EXPORT_SYMBOL_GPL(tcp_death_row);
57
58static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
59{
60 if (seq == s_win)
61 return 1;
62 if (after(end_seq, s_win) && before(seq, e_win))
63 return 1;
64 return (seq == e_win && seq == end_seq);
65}
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95enum tcp_tw_status
96tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
97 const struct tcphdr *th)
98{
99 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
100 struct tcp_options_received tmp_opt;
101 int paws_reject = 0;
102
103 tmp_opt.saw_tstamp = 0;
104 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
105 tcp_parse_options(skb, &tmp_opt, 0);
106
107 if (tmp_opt.saw_tstamp) {
108 tmp_opt.ts_recent = tcptw->tw_ts_recent;
109 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
110 paws_reject = tcp_paws_check(&tmp_opt, th->rst);
111 }
112 }
113
114 if (tw->tw_substate == TCP_FIN_WAIT2) {
115
116
117
118 if (paws_reject ||
119 !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
120 tcptw->tw_rcv_nxt,
121 tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
122 return TCP_TW_ACK;
123
124 if (th->rst)
125 goto kill;
126
127 if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
128 goto kill_with_rst;
129
130
131 if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
132 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
133 inet_twsk_put(tw);
134 return TCP_TW_SUCCESS;
135 }
136
137
138
139
140 if (!th->fin ||
141 TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
142kill_with_rst:
143 inet_twsk_deschedule(tw, &tcp_death_row);
144 inet_twsk_put(tw);
145 return TCP_TW_RST;
146 }
147
148
149 tw->tw_substate = TCP_TIME_WAIT;
150 tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
151 if (tmp_opt.saw_tstamp) {
152 tcptw->tw_ts_recent_stamp = get_seconds();
153 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
154 }
155
156
157
158
159
160
161 if (tw->tw_family == AF_INET &&
162 tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
163 tcp_v4_tw_remember_stamp(tw))
164 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
165 TCP_TIMEWAIT_LEN);
166 else
167 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
168 TCP_TIMEWAIT_LEN);
169 return TCP_TW_ACK;
170 }
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189 if (!paws_reject &&
190 (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
191 (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
192
193
194 if (th->rst) {
195
196
197
198
199 if (sysctl_tcp_rfc1337 == 0) {
200kill:
201 inet_twsk_deschedule(tw, &tcp_death_row);
202 inet_twsk_put(tw);
203 return TCP_TW_SUCCESS;
204 }
205 }
206 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
207 TCP_TIMEWAIT_LEN);
208
209 if (tmp_opt.saw_tstamp) {
210 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
211 tcptw->tw_ts_recent_stamp = get_seconds();
212 }
213
214 inet_twsk_put(tw);
215 return TCP_TW_SUCCESS;
216 }
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235 if (th->syn && !th->rst && !th->ack && !paws_reject &&
236 (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
237 (tmp_opt.saw_tstamp &&
238 (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) {
239 u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
240 if (isn == 0)
241 isn++;
242 TCP_SKB_CB(skb)->when = isn;
243 return TCP_TW_SYN;
244 }
245
246 if (paws_reject)
247 NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
248
249 if (!th->rst) {
250
251
252
253
254
255
256 if (paws_reject || th->ack)
257 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
258 TCP_TIMEWAIT_LEN);
259
260
261
262
263 return TCP_TW_ACK;
264 }
265 inet_twsk_put(tw);
266 return TCP_TW_SUCCESS;
267}
268
269
270
271
272void tcp_time_wait(struct sock *sk, int state, int timeo)
273{
274 struct inet_timewait_sock *tw = NULL;
275 const struct inet_connection_sock *icsk = inet_csk(sk);
276 const struct tcp_sock *tp = tcp_sk(sk);
277 int recycle_ok = 0;
278
279 if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
280 recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
281
282 if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
283 tw = inet_twsk_alloc(sk, state);
284
285 if (tw != NULL) {
286 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
287 const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
288
289 tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale;
290 tcptw->tw_rcv_nxt = tp->rcv_nxt;
291 tcptw->tw_snd_nxt = tp->snd_nxt;
292 tcptw->tw_rcv_wnd = tcp_receive_window(tp);
293 tcptw->tw_ts_recent = tp->rx_opt.ts_recent;
294 tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
295
296#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
297 if (tw->tw_family == PF_INET6) {
298 struct ipv6_pinfo *np = inet6_sk(sk);
299 struct inet6_timewait_sock *tw6;
300
301 tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
302 tw6 = inet6_twsk((struct sock *)tw);
303 ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
304 ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
305 tw->tw_ipv6only = np->ipv6only;
306 }
307#endif
308
309#ifdef CONFIG_TCP_MD5SIG
310
311
312
313
314
315
316 do {
317 struct tcp_md5sig_key *key;
318 memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key));
319 tcptw->tw_md5_keylen = 0;
320 key = tp->af_specific->md5_lookup(sk, sk);
321 if (key != NULL) {
322 memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
323 tcptw->tw_md5_keylen = key->keylen;
324 if (tcp_alloc_md5sig_pool() == NULL)
325 BUG();
326 }
327 } while (0);
328#endif
329
330
331 __inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
332
333
334 if (timeo < rto)
335 timeo = rto;
336
337 if (recycle_ok) {
338 tw->tw_timeout = rto;
339 } else {
340 tw->tw_timeout = TCP_TIMEWAIT_LEN;
341 if (state == TCP_TIME_WAIT)
342 timeo = TCP_TIMEWAIT_LEN;
343 }
344
345 inet_twsk_schedule(tw, &tcp_death_row, timeo,
346 TCP_TIMEWAIT_LEN);
347 inet_twsk_put(tw);
348 } else {
349
350
351
352
353 LIMIT_NETDEBUG(KERN_INFO "TCP: time wait bucket table overflow\n");
354 }
355
356 tcp_update_metrics(sk);
357 tcp_done(sk);
358}
359
360void tcp_twsk_destructor(struct sock *sk)
361{
362#ifdef CONFIG_TCP_MD5SIG
363 struct tcp_timewait_sock *twsk = tcp_twsk(sk);
364 if (twsk->tw_md5_keylen)
365 tcp_free_md5sig_pool();
366#endif
367}
368
369EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
370
371static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
372 struct request_sock *req)
373{
374 tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
375}
376
377
378
379
380
381
382
383struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
384{
385 struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
386
387 if (newsk != NULL) {
388 const struct inet_request_sock *ireq = inet_rsk(req);
389 struct tcp_request_sock *treq = tcp_rsk(req);
390 struct inet_connection_sock *newicsk = inet_csk(newsk);
391 struct tcp_sock *newtp;
392
393
394 newtp = tcp_sk(newsk);
395 newtp->pred_flags = 0;
396 newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
397 newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
398
399 tcp_prequeue_init(newtp);
400
401 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
402
403 newtp->srtt = 0;
404 newtp->mdev = TCP_TIMEOUT_INIT;
405 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
406
407 newtp->packets_out = 0;
408 newtp->retrans_out = 0;
409 newtp->sacked_out = 0;
410 newtp->fackets_out = 0;
411 newtp->snd_ssthresh = 0x7fffffff;
412
413
414
415
416
417
418 newtp->snd_cwnd = 2;
419 newtp->snd_cwnd_cnt = 0;
420 newtp->bytes_acked = 0;
421
422 newtp->frto_counter = 0;
423 newtp->frto_highmark = 0;
424
425 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
426
427 tcp_set_ca_state(newsk, TCP_CA_Open);
428 tcp_init_xmit_timers(newsk);
429 skb_queue_head_init(&newtp->out_of_order_queue);
430 newtp->write_seq = treq->snt_isn + 1;
431 newtp->pushed_seq = newtp->write_seq;
432
433 newtp->rx_opt.saw_tstamp = 0;
434
435 newtp->rx_opt.dsack = 0;
436 newtp->rx_opt.eff_sacks = 0;
437
438 newtp->rx_opt.num_sacks = 0;
439 newtp->urg_data = 0;
440
441 if (sock_flag(newsk, SOCK_KEEPOPEN))
442 inet_csk_reset_keepalive_timer(newsk,
443 keepalive_time_when(newtp));
444
445 newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
446 if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
447 if (sysctl_tcp_fack)
448 tcp_enable_fack(newtp);
449 }
450 newtp->window_clamp = req->window_clamp;
451 newtp->rcv_ssthresh = req->rcv_wnd;
452 newtp->rcv_wnd = req->rcv_wnd;
453 newtp->rx_opt.wscale_ok = ireq->wscale_ok;
454 if (newtp->rx_opt.wscale_ok) {
455 newtp->rx_opt.snd_wscale = ireq->snd_wscale;
456 newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
457 } else {
458 newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
459 newtp->window_clamp = min(newtp->window_clamp, 65535U);
460 }
461 newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
462 newtp->rx_opt.snd_wscale);
463 newtp->max_window = newtp->snd_wnd;
464
465 if (newtp->rx_opt.tstamp_ok) {
466 newtp->rx_opt.ts_recent = req->ts_recent;
467 newtp->rx_opt.ts_recent_stamp = get_seconds();
468 newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
469 } else {
470 newtp->rx_opt.ts_recent_stamp = 0;
471 newtp->tcp_header_len = sizeof(struct tcphdr);
472 }
473#ifdef CONFIG_TCP_MD5SIG
474 newtp->md5sig_info = NULL;
475 if (newtp->af_specific->md5_lookup(sk, newsk))
476 newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
477#endif
478 if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
479 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
480 newtp->rx_opt.mss_clamp = req->mss;
481 TCP_ECN_openreq_child(newtp, req);
482
483 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
484 }
485 return newsk;
486}
487
488
489
490
491
492
493struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
494 struct request_sock *req,
495 struct request_sock **prev)
496{
497 const struct tcphdr *th = tcp_hdr(skb);
498 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
499 int paws_reject = 0;
500 struct tcp_options_received tmp_opt;
501 struct sock *child;
502
503 tmp_opt.saw_tstamp = 0;
504 if (th->doff > (sizeof(struct tcphdr)>>2)) {
505 tcp_parse_options(skb, &tmp_opt, 0);
506
507 if (tmp_opt.saw_tstamp) {
508 tmp_opt.ts_recent = req->ts_recent;
509
510
511
512
513 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
514 paws_reject = tcp_paws_check(&tmp_opt, th->rst);
515 }
516 }
517
518
519 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
520 flg == TCP_FLAG_SYN &&
521 !paws_reject) {
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539 req->rsk_ops->rtx_syn_ack(sk, req);
540 return NULL;
541 }
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597 if ((flg & TCP_FLAG_ACK) &&
598 (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
599 return sk;
600
601
602
603
604
605
606
607
608 if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
609 tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
610
611 if (!(flg & TCP_FLAG_RST))
612 req->rsk_ops->send_ack(sk, skb, req);
613 if (paws_reject)
614 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
615 return NULL;
616 }
617
618
619
620 if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
621 req->ts_recent = tmp_opt.rcv_tsval;
622
623 if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
624
625
626 flg &= ~TCP_FLAG_SYN;
627 }
628
629
630
631
632 if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
633 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
634 goto embryonic_reset;
635 }
636
637
638
639
640 if (!(flg & TCP_FLAG_ACK))
641 return NULL;
642
643
644 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
645 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
646 inet_rsk(req)->acked = 1;
647 return NULL;
648 }
649
650
651
652
653
654
655
656 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
657 if (child == NULL)
658 goto listen_overflow;
659#ifdef CONFIG_TCP_MD5SIG
660 else {
661
662 struct tcp_md5sig_key *key;
663 struct tcp_sock *tp = tcp_sk(sk);
664 key = tp->af_specific->md5_lookup(sk, child);
665 if (key != NULL) {
666
667
668
669
670
671 char *newkey = kmemdup(key->key, key->keylen,
672 GFP_ATOMIC);
673 if (newkey) {
674 if (!tcp_alloc_md5sig_pool())
675 BUG();
676 tp->af_specific->md5_add(child, child, newkey,
677 key->keylen);
678 }
679 }
680 }
681#endif
682
683 inet_csk_reqsk_queue_unlink(sk, req, prev);
684 inet_csk_reqsk_queue_removed(sk, req);
685
686 inet_csk_reqsk_queue_add(sk, req, child);
687 return child;
688
689listen_overflow:
690 if (!sysctl_tcp_abort_on_overflow) {
691 inet_rsk(req)->acked = 1;
692 return NULL;
693 }
694
695embryonic_reset:
696 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
697 if (!(flg & TCP_FLAG_RST))
698 req->rsk_ops->send_reset(sk, skb);
699
700 inet_csk_reqsk_queue_drop(sk, req, prev);
701 return NULL;
702}
703
704
705
706
707
708
709
710int tcp_child_process(struct sock *parent, struct sock *child,
711 struct sk_buff *skb)
712{
713 int ret = 0;
714 int state = child->sk_state;
715
716 if (!sock_owned_by_user(child)) {
717 ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
718 skb->len);
719
720 if (state == TCP_SYN_RECV && child->sk_state != state)
721 parent->sk_data_ready(parent, 0);
722 } else {
723
724
725
726
727 sk_add_backlog(child, skb);
728 }
729
730 bh_unlock_sock(child);
731 sock_put(child);
732 return ret;
733}
734
735EXPORT_SYMBOL(tcp_check_req);
736EXPORT_SYMBOL(tcp_child_process);
737EXPORT_SYMBOL(tcp_create_openreq_child);
738EXPORT_SYMBOL(tcp_timewait_state_process);
739