1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#include <net/tcp.h>
24
25int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
26int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
27int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
28int sysctl_tcp_retries1 = TCP_RETR1;
29int sysctl_tcp_retries2 = TCP_RETR2;
30
31static void tcp_sltimer_handler(unsigned long);
32static void tcp_syn_recv_timer(unsigned long);
33static void tcp_keepalive(unsigned long data);
34static void tcp_bucketgc(unsigned long);
35static void tcp_twkill(unsigned long);
36
37struct timer_list tcp_slow_timer = {
38 NULL, NULL,
39 0, 0,
40 tcp_sltimer_handler,
41};
42
43
44struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
45 {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},
46 {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive},
47 {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill},
48 {ATOMIC_INIT(0), TCP_BUCKETGC_PERIOD, 0, tcp_bucketgc}
49};
50
51const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
52
53
54
55
56
57
58
59void tcp_init_xmit_timers(struct sock *sk)
60{
61 init_timer(&sk->tp_pinfo.af_tcp.retransmit_timer);
62 sk->tp_pinfo.af_tcp.retransmit_timer.function=&tcp_retransmit_timer;
63 sk->tp_pinfo.af_tcp.retransmit_timer.data = (unsigned long) sk;
64
65 init_timer(&sk->tp_pinfo.af_tcp.delack_timer);
66 sk->tp_pinfo.af_tcp.delack_timer.function=&tcp_delack_timer;
67 sk->tp_pinfo.af_tcp.delack_timer.data = (unsigned long) sk;
68
69 init_timer(&sk->tp_pinfo.af_tcp.probe_timer);
70 sk->tp_pinfo.af_tcp.probe_timer.function=&tcp_probe_timer;
71 sk->tp_pinfo.af_tcp.probe_timer.data = (unsigned long) sk;
72}
73
74
75
76
77
78void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
79{
80 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
81
82 switch (what) {
83 case TIME_RETRANS:
84
85
86
87
88
89 if(tp->probe_timer.prev)
90 del_timer(&tp->probe_timer);
91 mod_timer(&tp->retransmit_timer, jiffies+when);
92 break;
93
94 case TIME_DACK:
95 mod_timer(&tp->delack_timer, jiffies+when);
96 break;
97
98 case TIME_PROBE0:
99 mod_timer(&tp->probe_timer, jiffies+when);
100 break;
101
102 case TIME_WRITE:
103 printk(KERN_DEBUG "bug: tcp_reset_xmit_timer TIME_WRITE\n");
104 break;
105
106 default:
107 printk(KERN_DEBUG "bug: unknown timer value\n");
108 };
109}
110
111void tcp_clear_xmit_timers(struct sock *sk)
112{
113 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
114
115 if(tp->retransmit_timer.prev)
116 del_timer(&tp->retransmit_timer);
117 if(tp->delack_timer.prev)
118 del_timer(&tp->delack_timer);
119 if(tp->probe_timer.prev)
120 del_timer(&tp->probe_timer);
121}
122
123static int tcp_write_err(struct sock *sk, int force)
124{
125 sk->err = sk->err_soft ? sk->err_soft : ETIMEDOUT;
126 sk->error_report(sk);
127
128 tcp_clear_xmit_timers(sk);
129
130
131 if (!force && ((1<<sk->state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING))) {
132 tcp_time_wait(sk);
133 } else {
134
135 tcp_set_state(sk, TCP_CLOSE);
136 return 0;
137 }
138 return 1;
139}
140
141
142static int tcp_write_timeout(struct sock *sk)
143{
144 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
145
146
147 if ((sk->state == TCP_ESTABLISHED &&
148 tp->retransmits && (tp->retransmits % TCP_QUICK_TRIES) == 0) ||
149 (sk->state != TCP_ESTABLISHED && tp->retransmits > sysctl_tcp_retries1)) {
150 dst_negative_advice(&sk->dst_cache);
151 }
152
153
154 if(tp->retransmits > sysctl_tcp_syn_retries && sk->state==TCP_SYN_SENT) {
155 tcp_write_err(sk, 1);
156
157 return 0;
158 }
159
160
161 if (tp->retransmits > sysctl_tcp_retries2)
162 return tcp_write_err(sk, 0);
163
164 return 1;
165}
166
167void tcp_delack_timer(unsigned long data)
168{
169 struct sock *sk = (struct sock*)data;
170
171 if(!sk->zapped &&
172 sk->tp_pinfo.af_tcp.delayed_acks &&
173 sk->state != TCP_CLOSE) {
174
175 if (!atomic_read(&sk->sock_readers))
176 tcp_send_ack(sk);
177 else
178 tcp_send_delayed_ack(&(sk->tp_pinfo.af_tcp), HZ/10);
179 }
180}
181
182void tcp_probe_timer(unsigned long data)
183{
184 struct sock *sk = (struct sock*)data;
185 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
186
187 if(sk->zapped)
188 return;
189
190 if (atomic_read(&sk->sock_readers)) {
191
192 tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ/5);
193 return;
194 }
195
196
197
198
199
200
201 if (tp->probes_out > sysctl_tcp_retries2) {
202 if(sk->err_soft)
203 sk->err = sk->err_soft;
204 else
205 sk->err = ETIMEDOUT;
206 sk->error_report(sk);
207
208 if ((1<<sk->state) & (TCPF_FIN_WAIT1|TCPF_FIN_WAIT2|TCPF_CLOSING)) {
209
210 tcp_time_wait(sk);
211 } else {
212
213 tcp_set_state(sk, TCP_CLOSE);
214 }
215 } else {
216
217 tcp_send_probe0(sk);
218 }
219}
220
221static __inline__ int tcp_keepopen_proc(struct sock *sk)
222{
223 int res = 0;
224
225 if ((1<<sk->state) & (TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_FIN_WAIT2)) {
226 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
227 __u32 elapsed = jiffies - tp->rcv_tstamp;
228
229 if (elapsed >= sysctl_tcp_keepalive_time) {
230 if (tp->probes_out > sysctl_tcp_keepalive_probes) {
231 if(sk->err_soft)
232 sk->err = sk->err_soft;
233 else
234 sk->err = ETIMEDOUT;
235
236 tcp_set_state(sk, TCP_CLOSE);
237 sk->shutdown = SHUTDOWN_MASK;
238 if (!sk->dead)
239 sk->state_change(sk);
240 } else {
241 tp->probes_out++;
242 tp->pending = TIME_KEEPOPEN;
243 tcp_write_wakeup(sk);
244 res = 1;
245 }
246 }
247 }
248 return res;
249}
250
251
252static void tcp_bucketgc(unsigned long data)
253{
254 int i, reaped = 0;;
255
256 for(i = 0; i < TCP_BHTABLE_SIZE; i++) {
257 struct tcp_bind_bucket *tb = tcp_bound_hash[i];
258
259 while(tb) {
260 struct tcp_bind_bucket *next = tb->next;
261
262 if((tb->owners == NULL) &&
263 !(tb->flags & TCPB_FLAG_LOCKED)) {
264 reaped++;
265
266
267 if(tb->next)
268 tb->next->pprev = tb->pprev;
269 *tb->pprev = tb->next;
270
271
272 kmem_cache_free(tcp_bucket_cachep, tb);
273 }
274 tb = next;
275 }
276 }
277 if(reaped != 0) {
278 struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
279
280
281 atomic_sub(reaped, &slt->count);
282 }
283}
284
285
286int tcp_tw_death_row_slot = 0;
287static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] =
288 { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
289
290extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
291
292static void tcp_twkill(unsigned long data)
293{
294 struct tcp_tw_bucket *tw;
295 int killed = 0;
296
297 tw = tcp_tw_death_row[tcp_tw_death_row_slot];
298 tcp_tw_death_row[tcp_tw_death_row_slot] = NULL;
299 while(tw != NULL) {
300 struct tcp_tw_bucket *next = tw->next_death;
301
302 tcp_timewait_kill(tw);
303 killed++;
304 tw = next;
305 }
306 if(killed != 0) {
307 struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
308 atomic_sub(killed, &slt->count);
309 }
310 tcp_tw_death_row_slot =
311 ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
312}
313
314
315
316
317void tcp_tw_schedule(struct tcp_tw_bucket *tw)
318{
319 int slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1);
320
321 tw->death_slot = slot;
322 tw->next_death = tcp_tw_death_row[slot];
323 tcp_tw_death_row[slot] = tw;
324 tcp_inc_slow_timer(TCP_SLT_TWKILL);
325}
326
327
328void tcp_tw_reschedule(struct tcp_tw_bucket *tw)
329{
330 struct tcp_tw_bucket *walk;
331 int slot = tw->death_slot;
332
333 walk = tcp_tw_death_row[slot];
334 if(walk == tw) {
335 tcp_tw_death_row[slot] = tw->next_death;
336 } else {
337 while(walk->next_death != tw)
338 walk = walk->next_death;
339 walk->next_death = tw->next_death;
340 }
341 slot = (tcp_tw_death_row_slot - 1) & (TCP_TWKILL_SLOTS - 1);
342 tw->death_slot = slot;
343 tw->next_death = tcp_tw_death_row[slot];
344 tcp_tw_death_row[slot] = tw;
345
346}
347
348
349void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
350{
351 struct tcp_tw_bucket *walk;
352 int slot = tw->death_slot;
353
354 walk = tcp_tw_death_row[slot];
355 if(walk == tw) {
356 tcp_tw_death_row[slot] = tw->next_death;
357 } else {
358 while(walk->next_death != tw)
359 walk = walk->next_death;
360 walk->next_death = tw->next_death;
361 }
362 tcp_dec_slow_timer(TCP_SLT_TWKILL);
363}
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379#define MAX_KA_PROBES 5
380
381int sysctl_tcp_max_ka_probes = MAX_KA_PROBES;
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397static void tcp_keepalive(unsigned long data)
398{
399 static int chain_start = 0;
400 int count = 0;
401 int i;
402
403 for(i = chain_start; i < (chain_start + ((TCP_HTABLE_SIZE/2) >> 2)); i++) {
404 struct sock *sk = tcp_established_hash[i];
405 while(sk) {
406 if(!atomic_read(&sk->sock_readers) && sk->keepopen) {
407 count += tcp_keepopen_proc(sk);
408 if(count == sysctl_tcp_max_ka_probes)
409 goto out;
410 }
411 sk = sk->next;
412 }
413 }
414out:
415 chain_start = ((chain_start + ((TCP_HTABLE_SIZE/2)>>2)) &
416 ((TCP_HTABLE_SIZE/2) - 1));
417}
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432void tcp_retransmit_timer(unsigned long data)
433{
434 struct sock *sk = (struct sock*)data;
435 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
436
437
438 if(sk->zapped) {
439 tcp_clear_xmit_timer(sk, TIME_RETRANS);
440 return;
441 }
442
443 if (atomic_read(&sk->sock_readers)) {
444
445 tcp_reset_xmit_timer(sk, TIME_RETRANS, HZ/20);
446 return;
447 }
448
449
450 tcp_clear_xmit_timer(sk, TIME_DACK);
451
452
453
454
455
456 if(tp->sack_ok) {
457 struct sk_buff *skb = skb_peek(&sk->write_queue);
458
459 while((skb != NULL) &&
460 (skb != tp->send_head) &&
461 (skb != (struct sk_buff *)&sk->write_queue)) {
462 TCP_SKB_CB(skb)->sacked &=
463 ~(TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS);
464 skb = skb->next;
465 }
466 }
467
468
469 tp->retrans_head = NULL;
470 tp->rexmt_done = 0;
471 tp->fackets_out = 0;
472 tp->retrans_out = 0;
473 if (tp->retransmits == 0) {
474
475
476
477
478
479
480
481 tp->snd_ssthresh = max(min(tp->snd_wnd, tp->snd_cwnd) >> 1, 2);
482 tp->snd_cwnd_cnt = 0;
483 tp->snd_cwnd = 1;
484 }
485
486 tp->retransmits++;
487
488 tp->dup_acks = 0;
489 tp->high_seq = tp->snd_nxt;
490 tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507 tp->backoff++;
508 tp->rto = min(tp->rto << 1, 120*HZ);
509 tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
510
511 tcp_write_timeout(sk);
512}
513
514
515
516
517
518
519static void tcp_syn_recv_timer(unsigned long data)
520{
521 struct sock *sk;
522 unsigned long now = jiffies;
523 int i;
524
525 for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
526 sk = tcp_listening_hash[i];
527
528 while(sk) {
529 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
530
531
532 if (!atomic_read(&sk->sock_readers) && tp->syn_wait_queue) {
533 struct open_request *prev = (struct open_request *)(&tp->syn_wait_queue);
534 struct open_request *req = tp->syn_wait_queue;
535 do {
536 struct open_request *conn;
537
538 conn = req;
539 req = req->dl_next;
540
541 if (conn->sk) {
542 prev = conn;
543 continue;
544 }
545
546 if ((long)(now - conn->expires) <= 0)
547 break;
548
549
550 tcp_synq_unlink(tp, conn, prev);
551 if (conn->retrans >= sysctl_tcp_retries1) {
552#ifdef TCP_DEBUG
553 printk(KERN_DEBUG "syn_recv: "
554 "too many retransmits\n");
555#endif
556 (*conn->class->destructor)(conn);
557 tcp_dec_slow_timer(TCP_SLT_SYNACK);
558 tp->syn_backlog--;
559 tcp_openreq_free(conn);
560
561 if (!tp->syn_wait_queue)
562 break;
563 } else {
564 __u32 timeo;
565 struct open_request *op;
566
567 (*conn->class->rtx_syn_ack)(sk, conn);
568
569 conn->retrans++;
570#ifdef TCP_DEBUG
571 printk(KERN_DEBUG "syn_ack rtx %d\n",
572 conn->retrans);
573#endif
574 timeo = min((TCP_TIMEOUT_INIT
575 << conn->retrans),
576 120*HZ);
577 conn->expires = now + timeo;
578 op = prev->dl_next;
579 tcp_synq_queue(tp, conn);
580 if (op != prev->dl_next)
581 prev = prev->dl_next;
582 }
583
584 } while (req);
585 }
586 sk = sk->next;
587 }
588 }
589}
590
591void tcp_sltimer_handler(unsigned long data)
592{
593 struct tcp_sl_timer *slt = tcp_slt_array;
594 unsigned long next = ~0UL;
595 unsigned long now = jiffies;
596 int i;
597
598 for (i=0; i < TCP_SLT_MAX; i++, slt++) {
599 if (atomic_read(&slt->count)) {
600 long trigger;
601
602 trigger = slt->period - ((long)(now - slt->last));
603
604 if (trigger <= 0) {
605 (*slt->handler)((unsigned long) slt);
606 slt->last = now;
607 trigger = slt->period;
608 }
609
610
611 if (atomic_read(&slt->count))
612 next = min(next, trigger);
613 }
614 }
615 if (next != ~0UL)
616 mod_timer(&tcp_slow_timer, (now + next));
617}
618
619void __tcp_inc_slow_timer(struct tcp_sl_timer *slt)
620{
621 unsigned long now = jiffies;
622 unsigned long when;
623
624 slt->last = now;
625
626 when = now + slt->period;
627
628 if (tcp_slow_timer.prev) {
629 if ((long)(tcp_slow_timer.expires - when) >= 0)
630 mod_timer(&tcp_slow_timer, when);
631 } else {
632 tcp_slow_timer.expires = when;
633 add_timer(&tcp_slow_timer);
634 }
635}
636