1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250#include <linux/config.h>
251#include <linux/module.h>
252#include <linux/types.h>
253#include <linux/fcntl.h>
254#include <linux/poll.h>
255#include <linux/init.h>
256#include <linux/smp_lock.h>
257#include <linux/fs.h>
258#include <linux/random.h>
259
260#include <net/icmp.h>
261#include <net/tcp.h>
262#include <net/xfrm.h>
263#include <net/ip.h>
264
265
266#include <asm/uaccess.h>
267#include <asm/ioctls.h>
268
269int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
270
271DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics);
272
273kmem_cache_t *tcp_openreq_cachep;
274kmem_cache_t *tcp_bucket_cachep;
275kmem_cache_t *tcp_timewait_cachep;
276
277atomic_t tcp_orphan_count = ATOMIC_INIT(0);
278
279int sysctl_tcp_mem[3];
280int sysctl_tcp_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
281int sysctl_tcp_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
282
283atomic_t tcp_memory_allocated;
284atomic_t tcp_sockets_allocated;
285
286
287
288
289
290int tcp_memory_pressure;
291
292#define TCP_PAGES(amt) (((amt) + TCP_MEM_QUANTUM - 1) / TCP_MEM_QUANTUM)
293
294int tcp_mem_schedule(struct sock *sk, int size, int kind)
295{
296 int amt = TCP_PAGES(size);
297
298 sk->sk_forward_alloc += amt * TCP_MEM_QUANTUM;
299 atomic_add(amt, &tcp_memory_allocated);
300
301
302 if (atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
303 if (tcp_memory_pressure)
304 tcp_memory_pressure = 0;
305 return 1;
306 }
307
308
309 if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) {
310 tcp_enter_memory_pressure();
311 goto suppress_allocation;
312 }
313
314
315 if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1])
316 tcp_enter_memory_pressure();
317
318 if (kind) {
319 if (atomic_read(&sk->sk_rmem_alloc) < sysctl_tcp_rmem[0])
320 return 1;
321 } else if (sk->sk_wmem_queued < sysctl_tcp_wmem[0])
322 return 1;
323
324 if (!tcp_memory_pressure ||
325 sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated) *
326 TCP_PAGES(sk->sk_wmem_queued +
327 atomic_read(&sk->sk_rmem_alloc) +
328 sk->sk_forward_alloc))
329 return 1;
330
331suppress_allocation:
332
333 if (!kind) {
334 tcp_moderate_sndbuf(sk);
335
336
337
338
339 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
340 return 1;
341 }
342
343
344 sk->sk_forward_alloc -= amt * TCP_MEM_QUANTUM;
345 atomic_sub(amt, &tcp_memory_allocated);
346 return 0;
347}
348
349void __tcp_mem_reclaim(struct sock *sk)
350{
351 if (sk->sk_forward_alloc >= TCP_MEM_QUANTUM) {
352 atomic_sub(sk->sk_forward_alloc / TCP_MEM_QUANTUM,
353 &tcp_memory_allocated);
354 sk->sk_forward_alloc &= TCP_MEM_QUANTUM - 1;
355 if (tcp_memory_pressure &&
356 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
357 tcp_memory_pressure = 0;
358 }
359}
360
361void tcp_rfree(struct sk_buff *skb)
362{
363 struct sock *sk = skb->sk;
364
365 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
366 sk->sk_forward_alloc += skb->truesize;
367}
368
369
370
371
372static __inline__ unsigned int tcp_listen_poll(struct sock *sk,
373 poll_table *wait)
374{
375 return tcp_sk(sk)->accept_queue ? (POLLIN | POLLRDNORM) : 0;
376}
377
378
379
380
381
382
383
384
385unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
386{
387 unsigned int mask;
388 struct sock *sk = sock->sk;
389 struct tcp_opt *tp = tcp_sk(sk);
390
391 poll_wait(file, sk->sk_sleep, wait);
392 if (sk->sk_state == TCP_LISTEN)
393 return tcp_listen_poll(sk, wait);
394
395
396
397
398
399
400 mask = 0;
401 if (sk->sk_err)
402 mask = POLLERR;
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
432 mask |= POLLHUP;
433 if (sk->sk_shutdown & RCV_SHUTDOWN)
434 mask |= POLLIN | POLLRDNORM;
435
436
437 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
438
439
440
441 if ((tp->rcv_nxt != tp->copied_seq) &&
442 (tp->urg_seq != tp->copied_seq ||
443 tp->rcv_nxt != tp->copied_seq + 1 ||
444 sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data))
445 mask |= POLLIN | POLLRDNORM;
446
447 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
448 if (tcp_wspace(sk) >= tcp_min_write_space(sk)) {
449 mask |= POLLOUT | POLLWRNORM;
450 } else {
451 set_bit(SOCK_ASYNC_NOSPACE,
452 &sk->sk_socket->flags);
453 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
454
455
456
457
458
459 if (tcp_wspace(sk) >= tcp_min_write_space(sk))
460 mask |= POLLOUT | POLLWRNORM;
461 }
462 }
463
464 if (tp->urg_data & TCP_URG_VALID)
465 mask |= POLLPRI;
466 }
467 return mask;
468}
469
470
471
472
473void tcp_write_space(struct sock *sk)
474{
475 struct socket *sock = sk->sk_socket;
476
477 if (tcp_wspace(sk) >= tcp_min_write_space(sk) && sock) {
478 clear_bit(SOCK_NOSPACE, &sock->flags);
479
480 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
481 wake_up_interruptible(sk->sk_sleep);
482
483 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
484 sock_wake_async(sock, 2, POLL_OUT);
485 }
486}
487
488int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
489{
490 struct tcp_opt *tp = tcp_sk(sk);
491 int answ;
492
493 switch (cmd) {
494 case SIOCINQ:
495 if (sk->sk_state == TCP_LISTEN)
496 return -EINVAL;
497
498 lock_sock(sk);
499 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
500 answ = 0;
501 else if (sock_flag(sk, SOCK_URGINLINE) ||
502 !tp->urg_data ||
503 before(tp->urg_seq, tp->copied_seq) ||
504 !before(tp->urg_seq, tp->rcv_nxt)) {
505 answ = tp->rcv_nxt - tp->copied_seq;
506
507
508 if (answ && !skb_queue_empty(&sk->sk_receive_queue))
509 answ -=
510 ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin;
511 } else
512 answ = tp->urg_seq - tp->copied_seq;
513 release_sock(sk);
514 break;
515 case SIOCATMARK:
516 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
517 break;
518 case SIOCOUTQ:
519 if (sk->sk_state == TCP_LISTEN)
520 return -EINVAL;
521
522 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
523 answ = 0;
524 else
525 answ = tp->write_seq - tp->snd_una;
526 break;
527 default:
528 return -ENOIOCTLCMD;
529 };
530
531 return put_user(answ, (int *)arg);
532}
533
534
535int tcp_listen_start(struct sock *sk)
536{
537 struct inet_opt *inet = inet_sk(sk);
538 struct tcp_opt *tp = tcp_sk(sk);
539 struct tcp_listen_opt *lopt;
540
541 sk->sk_max_ack_backlog = 0;
542 sk->sk_ack_backlog = 0;
543 tp->accept_queue = tp->accept_queue_tail = NULL;
544 tp->syn_wait_lock = RW_LOCK_UNLOCKED;
545 tcp_delack_init(tp);
546
547 lopt = kmalloc(sizeof(struct tcp_listen_opt), GFP_KERNEL);
548 if (!lopt)
549 return -ENOMEM;
550
551 memset(lopt, 0, sizeof(struct tcp_listen_opt));
552 for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++)
553 if ((1 << lopt->max_qlen_log) >= sysctl_max_syn_backlog)
554 break;
555 get_random_bytes(&lopt->hash_rnd, 4);
556
557 write_lock_bh(&tp->syn_wait_lock);
558 tp->listen_opt = lopt;
559 write_unlock_bh(&tp->syn_wait_lock);
560
561
562
563
564
565
566 sk->sk_state = TCP_LISTEN;
567 if (!sk->sk_prot->get_port(sk, inet->num)) {
568 inet->sport = htons(inet->num);
569
570 sk_dst_reset(sk);
571 sk->sk_prot->hash(sk);
572
573 return 0;
574 }
575
576 sk->sk_state = TCP_CLOSE;
577 write_lock_bh(&tp->syn_wait_lock);
578 tp->listen_opt = NULL;
579 write_unlock_bh(&tp->syn_wait_lock);
580 kfree(lopt);
581 return -EADDRINUSE;
582}
583
584
585
586
587
588
589static void tcp_listen_stop (struct sock *sk)
590{
591 struct tcp_opt *tp = tcp_sk(sk);
592 struct tcp_listen_opt *lopt = tp->listen_opt;
593 struct open_request *acc_req = tp->accept_queue;
594 struct open_request *req;
595 int i;
596
597 tcp_delete_keepalive_timer(sk);
598
599
600 write_lock_bh(&tp->syn_wait_lock);
601 tp->listen_opt = NULL;
602 write_unlock_bh(&tp->syn_wait_lock);
603 tp->accept_queue = tp->accept_queue_tail = NULL;
604
605 if (lopt->qlen) {
606 for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
607 while ((req = lopt->syn_table[i]) != NULL) {
608 lopt->syn_table[i] = req->dl_next;
609 lopt->qlen--;
610 tcp_openreq_free(req);
611
612
613
614
615
616
617
618
619
620 }
621 }
622 }
623 BUG_TRAP(!lopt->qlen);
624
625 kfree(lopt);
626
627 while ((req = acc_req) != NULL) {
628 struct sock *child = req->sk;
629
630 acc_req = req->dl_next;
631
632 local_bh_disable();
633 bh_lock_sock(child);
634 BUG_TRAP(!sock_owned_by_user(child));
635 sock_hold(child);
636
637 tcp_disconnect(child, O_NONBLOCK);
638
639 sock_orphan(child);
640
641 atomic_inc(&tcp_orphan_count);
642
643 tcp_destroy_sock(child);
644
645 bh_unlock_sock(child);
646 local_bh_enable();
647 sock_put(child);
648
649 tcp_acceptq_removed(sk);
650 tcp_openreq_fastfree(req);
651 }
652 BUG_TRAP(!sk->sk_ack_backlog);
653}
654
655
656
657
658
659
660static int wait_for_tcp_connect(struct sock *sk, int flags, long *timeo_p)
661{
662 struct tcp_opt *tp = tcp_sk(sk);
663 struct task_struct *tsk = current;
664 DEFINE_WAIT(wait);
665
666 while ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
667 if (sk->sk_err)
668 return sock_error(sk);
669 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
670 return -EPIPE;
671 if (!*timeo_p)
672 return -EAGAIN;
673 if (signal_pending(tsk))
674 return sock_intr_errno(*timeo_p);
675
676 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
677 tp->write_pending++;
678
679 release_sock(sk);
680 *timeo_p = schedule_timeout(*timeo_p);
681 lock_sock(sk);
682
683 finish_wait(sk->sk_sleep, &wait);
684 tp->write_pending--;
685 }
686 return 0;
687}
688
689static inline int tcp_memory_free(struct sock *sk)
690{
691 return sk->sk_wmem_queued < sk->sk_sndbuf;
692}
693
694
695
696
697static int wait_for_tcp_memory(struct sock *sk, long *timeo)
698{
699 struct tcp_opt *tp = tcp_sk(sk);
700 int err = 0;
701 long vm_wait = 0;
702 long current_timeo = *timeo;
703 DEFINE_WAIT(wait);
704
705 if (tcp_memory_free(sk))
706 current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
707
708 for (;;) {
709 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
710
711 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
712
713 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
714 goto do_error;
715 if (!*timeo)
716 goto do_nonblock;
717 if (signal_pending(current))
718 goto do_interrupted;
719 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
720 if (tcp_memory_free(sk) && !vm_wait)
721 break;
722
723 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
724 tp->write_pending++;
725 release_sock(sk);
726 if (!tcp_memory_free(sk) || vm_wait)
727 current_timeo = schedule_timeout(current_timeo);
728 lock_sock(sk);
729 tp->write_pending--;
730
731 if (vm_wait) {
732 vm_wait -= current_timeo;
733 current_timeo = *timeo;
734 if (current_timeo != MAX_SCHEDULE_TIMEOUT &&
735 (current_timeo -= vm_wait) < 0)
736 current_timeo = 0;
737 vm_wait = 0;
738 }
739 *timeo = current_timeo;
740 }
741out:
742 finish_wait(sk->sk_sleep, &wait);
743 return err;
744
745do_error:
746 err = -EPIPE;
747 goto out;
748do_nonblock:
749 err = -EAGAIN;
750 goto out;
751do_interrupted:
752 err = sock_intr_errno(*timeo);
753 goto out;
754}
755
756ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
757 size_t psize, int flags);
758
759static inline int can_coalesce(struct sk_buff *skb, int i, struct page *page,
760 int off)
761{
762 if (i) {
763 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
764 return page == frag->page &&
765 off == frag->page_offset + frag->size;
766 }
767 return 0;
768}
769
770static inline void fill_page_desc(struct sk_buff *skb, int i,
771 struct page *page, int off, int size)
772{
773 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
774 frag->page = page;
775 frag->page_offset = off;
776 frag->size = size;
777 skb_shinfo(skb)->nr_frags = i + 1;
778}
779
780static inline void tcp_mark_push(struct tcp_opt *tp, struct sk_buff *skb)
781{
782 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
783 tp->pushed_seq = tp->write_seq;
784}
785
786static inline int forced_push(struct tcp_opt *tp)
787{
788 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
789}
790
791static inline void skb_entail(struct sock *sk, struct tcp_opt *tp,
792 struct sk_buff *skb)
793{
794 skb->csum = 0;
795 TCP_SKB_CB(skb)->seq = tp->write_seq;
796 TCP_SKB_CB(skb)->end_seq = tp->write_seq;
797 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
798 TCP_SKB_CB(skb)->sacked = 0;
799 __skb_queue_tail(&sk->sk_write_queue, skb);
800 tcp_charge_skb(sk, skb);
801 if (!tp->send_head)
802 tp->send_head = skb;
803 else if (tp->nonagle&TCP_NAGLE_PUSH)
804 tp->nonagle &= ~TCP_NAGLE_PUSH;
805}
806
807static inline void tcp_mark_urg(struct tcp_opt *tp, int flags,
808 struct sk_buff *skb)
809{
810 if (flags & MSG_OOB) {
811 tp->urg_mode = 1;
812 tp->snd_up = tp->write_seq;
813 TCP_SKB_CB(skb)->sacked |= TCPCB_URG;
814 }
815}
816
817static inline void tcp_push(struct sock *sk, struct tcp_opt *tp, int flags,
818 int mss_now, int nonagle)
819{
820 if (tp->send_head) {
821 struct sk_buff *skb = sk->sk_write_queue.prev;
822 if (!(flags & MSG_MORE) || forced_push(tp))
823 tcp_mark_push(tp, skb);
824 tcp_mark_urg(tp, flags, skb);
825 __tcp_push_pending_frames(sk, tp, mss_now,
826 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
827 }
828}
829
830static int tcp_error(struct sock *sk, int flags, int err)
831{
832 if (err == -EPIPE)
833 err = sock_error(sk) ? : -EPIPE;
834 if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
835 send_sig(SIGPIPE, current, 0);
836 return err;
837}
838
839ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
840 size_t psize, int flags)
841{
842 struct tcp_opt *tp = tcp_sk(sk);
843 int mss_now;
844 int err;
845 ssize_t copied;
846 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
847
848
849 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
850 if ((err = wait_for_tcp_connect(sk, 0, &timeo)) != 0)
851 goto out_err;
852
853 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
854
855 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
856 copied = 0;
857
858 err = -EPIPE;
859 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
860 goto do_error;
861
862 while (psize > 0) {
863 struct sk_buff *skb = sk->sk_write_queue.prev;
864 struct page *page = pages[poffset / PAGE_SIZE];
865 int copy, i;
866 int offset = poffset % PAGE_SIZE;
867 int size = min_t(size_t, psize, PAGE_SIZE - offset);
868
869 if (!tp->send_head || (copy = mss_now - skb->len) <= 0) {
870new_segment:
871 if (!tcp_memory_free(sk))
872 goto wait_for_sndbuf;
873
874 skb = tcp_alloc_pskb(sk, 0, tp->mss_cache,
875 sk->sk_allocation);
876 if (!skb)
877 goto wait_for_memory;
878
879 skb_entail(sk, tp, skb);
880 copy = mss_now;
881 }
882
883 if (copy > size)
884 copy = size;
885
886 i = skb_shinfo(skb)->nr_frags;
887 if (can_coalesce(skb, i, page, offset)) {
888 skb_shinfo(skb)->frags[i - 1].size += copy;
889 } else if (i < MAX_SKB_FRAGS) {
890 get_page(page);
891 fill_page_desc(skb, i, page, offset, copy);
892 } else {
893 tcp_mark_push(tp, skb);
894 goto new_segment;
895 }
896
897 skb->len += copy;
898 skb->data_len += copy;
899 skb->ip_summed = CHECKSUM_HW;
900 tp->write_seq += copy;
901 TCP_SKB_CB(skb)->end_seq += copy;
902
903 if (!copied)
904 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
905
906 copied += copy;
907 poffset += copy;
908 if (!(psize -= copy))
909 goto out;
910
911 if (skb->len != mss_now || (flags & MSG_OOB))
912 continue;
913
914 if (forced_push(tp)) {
915 tcp_mark_push(tp, skb);
916 __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
917 } else if (skb == tp->send_head)
918 tcp_push_one(sk, mss_now);
919 continue;
920
921wait_for_sndbuf:
922 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
923wait_for_memory:
924 if (copied)
925 tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
926
927 if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
928 goto do_error;
929
930 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
931 }
932
933out:
934 if (copied)
935 tcp_push(sk, tp, flags, mss_now, tp->nonagle);
936 return copied;
937
938do_error:
939 if (copied)
940 goto out;
941out_err:
942 return tcp_error(sk, flags, err);
943}
944
945ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
946 size_t size, int flags)
947{
948 ssize_t res;
949 struct sock *sk = sock->sk;
950
951#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
952
953 if (!(sk->sk_route_caps & NETIF_F_SG) ||
954 !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))
955 return sock_no_sendpage(sock, page, offset, size, flags);
956
957#undef TCP_ZC_CSUM_FLAGS
958
959 lock_sock(sk);
960 TCP_CHECK_TIMER(sk);
961 res = do_tcp_sendpages(sk, &page, offset, size, flags);
962 TCP_CHECK_TIMER(sk);
963 release_sock(sk);
964 return res;
965}
966
967#define TCP_PAGE(sk) (inet_sk(sk)->sndmsg_page)
968#define TCP_OFF(sk) (inet_sk(sk)->sndmsg_off)
969
970static inline int tcp_copy_to_page(struct sock *sk, char *from,
971 struct sk_buff *skb, struct page *page,
972 int off, int copy)
973{
974 int err = 0;
975 unsigned int csum;
976
977 if (skb->ip_summed == CHECKSUM_NONE) {
978 csum = csum_and_copy_from_user(from, page_address(page) + off,
979 copy, 0, &err);
980 if (err) return err;
981 skb->csum = csum_block_add(skb->csum, csum, skb->len);
982 } else {
983 if (copy_from_user(page_address(page) + off, from, copy))
984 return -EFAULT;
985 }
986
987 skb->len += copy;
988 skb->data_len += copy;
989 skb->truesize += copy;
990 sk->sk_wmem_queued += copy;
991 sk->sk_forward_alloc -= copy;
992 return 0;
993}
994
995static inline int skb_add_data(struct sk_buff *skb, char *from, int copy)
996{
997 int err = 0;
998 unsigned int csum;
999 int off = skb->len;
1000
1001 if (skb->ip_summed == CHECKSUM_NONE) {
1002 csum = csum_and_copy_from_user(from, skb_put(skb, copy),
1003 copy, 0, &err);
1004 if (!err) {
1005 skb->csum = csum_block_add(skb->csum, csum, off);
1006 return 0;
1007 }
1008 } else {
1009 if (!copy_from_user(skb_put(skb, copy), from, copy))
1010 return 0;
1011 }
1012
1013 __skb_trim(skb, off);
1014 return -EFAULT;
1015}
1016
1017static inline int select_size(struct sock *sk, struct tcp_opt *tp)
1018{
1019 int tmp = tp->mss_cache_std;
1020
1021 if (sk->sk_route_caps & NETIF_F_SG) {
1022 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
1023
1024 if (tmp >= pgbreak &&
1025 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
1026 tmp = pgbreak;
1027 }
1028 return tmp;
1029}
1030
1031int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1032 int size)
1033{
1034 struct iovec *iov;
1035 struct tcp_opt *tp = tcp_sk(sk);
1036 struct sk_buff *skb;
1037 int iovlen, flags;
1038 int mss_now;
1039 int err, copied;
1040 long timeo;
1041
1042 lock_sock(sk);
1043 TCP_CHECK_TIMER(sk);
1044
1045 flags = msg->msg_flags;
1046 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1047
1048
1049 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1050 if ((err = wait_for_tcp_connect(sk, flags, &timeo)) != 0)
1051 goto out_err;
1052
1053
1054 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1055
1056 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
1057
1058
1059 iovlen = msg->msg_iovlen;
1060 iov = msg->msg_iov;
1061 copied = 0;
1062
1063 err = -EPIPE;
1064 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1065 goto do_error;
1066
1067 while (--iovlen >= 0) {
1068 int seglen = iov->iov_len;
1069 unsigned char *from = iov->iov_base;
1070
1071 iov++;
1072
1073 while (seglen > 0) {
1074 int copy;
1075
1076 skb = sk->sk_write_queue.prev;
1077
1078 if (!tp->send_head ||
1079 (copy = mss_now - skb->len) <= 0) {
1080
1081new_segment:
1082
1083
1084
1085 if (!tcp_memory_free(sk))
1086 goto wait_for_sndbuf;
1087
1088 skb = tcp_alloc_pskb(sk, select_size(sk, tp),
1089 0, sk->sk_allocation);
1090 if (!skb)
1091 goto wait_for_memory;
1092
1093
1094
1095
1096 if (sk->sk_route_caps &
1097 (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
1098 NETIF_F_HW_CSUM))
1099 skb->ip_summed = CHECKSUM_HW;
1100
1101 skb_entail(sk, tp, skb);
1102 copy = mss_now;
1103 }
1104
1105
1106 if (copy > seglen)
1107 copy = seglen;
1108
1109
1110 if (skb_tailroom(skb) > 0) {
1111
1112 if (copy > skb_tailroom(skb))
1113 copy = skb_tailroom(skb);
1114 if ((err = skb_add_data(skb, from, copy)) != 0)
1115 goto do_fault;
1116 } else {
1117 int merge = 0;
1118 int i = skb_shinfo(skb)->nr_frags;
1119 struct page *page = TCP_PAGE(sk);
1120 int off = TCP_OFF(sk);
1121
1122 if (can_coalesce(skb, i, page, off) &&
1123 off != PAGE_SIZE) {
1124
1125
1126 merge = 1;
1127 } else if (i == MAX_SKB_FRAGS ||
1128 (!i &&
1129 !(sk->sk_route_caps & NETIF_F_SG))) {
1130
1131
1132
1133
1134 tcp_mark_push(tp, skb);
1135 goto new_segment;
1136 } else if (page) {
1137
1138
1139
1140 off = (off + L1_CACHE_BYTES - 1) &
1141 ~(L1_CACHE_BYTES - 1);
1142 if (off == PAGE_SIZE) {
1143 put_page(page);
1144 TCP_PAGE(sk) = page = NULL;
1145 }
1146 }
1147
1148 if (!page) {
1149
1150 if (!(page = tcp_alloc_page(sk)))
1151 goto wait_for_memory;
1152 off = 0;
1153 }
1154
1155 if (copy > PAGE_SIZE - off)
1156 copy = PAGE_SIZE - off;
1157
1158
1159
1160 err = tcp_copy_to_page(sk, from, skb, page,
1161 off, copy);
1162 if (err) {
1163
1164
1165
1166 if (!TCP_PAGE(sk)) {
1167 TCP_PAGE(sk) = page;
1168 TCP_OFF(sk) = 0;
1169 }
1170 goto do_error;
1171 }
1172
1173
1174 if (merge) {
1175 skb_shinfo(skb)->frags[i - 1].size +=
1176 copy;
1177 } else {
1178 fill_page_desc(skb, i, page, off, copy);
1179 if (TCP_PAGE(sk)) {
1180 get_page(page);
1181 } else if (off + copy < PAGE_SIZE) {
1182 get_page(page);
1183 TCP_PAGE(sk) = page;
1184 }
1185 }
1186
1187 TCP_OFF(sk) = off + copy;
1188 }
1189
1190 if (!copied)
1191 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
1192
1193 tp->write_seq += copy;
1194 TCP_SKB_CB(skb)->end_seq += copy;
1195
1196 from += copy;
1197 copied += copy;
1198 if ((seglen -= copy) == 0 && iovlen == 0)
1199 goto out;
1200
1201 if (skb->len != mss_now || (flags & MSG_OOB))
1202 continue;
1203
1204 if (forced_push(tp)) {
1205 tcp_mark_push(tp, skb);
1206 __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
1207 } else if (skb == tp->send_head)
1208 tcp_push_one(sk, mss_now);
1209 continue;
1210
1211wait_for_sndbuf:
1212 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1213wait_for_memory:
1214 if (copied)
1215 tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1216
1217 if ((err = wait_for_tcp_memory(sk, &timeo)) != 0)
1218 goto do_error;
1219
1220 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
1221 }
1222 }
1223
1224out:
1225 if (copied)
1226 tcp_push(sk, tp, flags, mss_now, tp->nonagle);
1227 TCP_CHECK_TIMER(sk);
1228 release_sock(sk);
1229 return copied;
1230
1231do_fault:
1232 if (!skb->len) {
1233 if (tp->send_head == skb)
1234 tp->send_head = NULL;
1235 __skb_unlink(skb, skb->list);
1236 tcp_free_skb(sk, skb);
1237 }
1238
1239do_error:
1240 if (copied)
1241 goto out;
1242out_err:
1243 err = tcp_error(sk, flags, err);
1244 TCP_CHECK_TIMER(sk);
1245 release_sock(sk);
1246 return err;
1247}
1248
1249
1250
1251
1252
1253
1254static int tcp_recv_urg(struct sock *sk, long timeo,
1255 struct msghdr *msg, int len, int flags,
1256 int *addr_len)
1257{
1258 struct tcp_opt *tp = tcp_sk(sk);
1259
1260
1261 if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
1262 tp->urg_data == TCP_URG_READ)
1263 return -EINVAL;
1264
1265 if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
1266 return -ENOTCONN;
1267
1268 if (tp->urg_data & TCP_URG_VALID) {
1269 int err = 0;
1270 char c = tp->urg_data;
1271
1272 if (!(flags & MSG_PEEK))
1273 tp->urg_data = TCP_URG_READ;
1274
1275
1276 msg->msg_flags |= MSG_OOB;
1277
1278 if (len > 0) {
1279 if (!(flags & MSG_TRUNC))
1280 err = memcpy_toiovec(msg->msg_iov, &c, 1);
1281 len = 1;
1282 } else
1283 msg->msg_flags |= MSG_TRUNC;
1284
1285 return err ? -EFAULT : len;
1286 }
1287
1288 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
1289 return 0;
1290
1291
1292
1293
1294
1295
1296
1297 return -EAGAIN;
1298}
1299
1300
1301
1302
1303
1304
1305
1306static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
1307{
1308 __skb_unlink(skb, &sk->sk_receive_queue);
1309 __kfree_skb(skb);
1310}
1311
1312
1313
1314
1315
1316
1317
1318static void cleanup_rbuf(struct sock *sk, int copied)
1319{
1320 struct tcp_opt *tp = tcp_sk(sk);
1321 int time_to_ack = 0;
1322
1323#if TCP_DEBUG
1324 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1325
1326 BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
1327#endif
1328
1329 if (tcp_ack_scheduled(tp)) {
1330
1331
1332 if (tp->ack.blocked ||
1333
1334 tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss ||
1335
1336
1337
1338
1339
1340
1341 (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) &&
1342 !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc)))
1343 time_to_ack = 1;
1344 }
1345
1346
1347
1348
1349
1350
1351
1352 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1353 __u32 rcv_window_now = tcp_receive_window(tp);
1354
1355
1356 if (2*rcv_window_now <= tp->window_clamp) {
1357 __u32 new_window = __tcp_select_window(sk);
1358
1359
1360
1361
1362
1363
1364 if (new_window && new_window >= 2 * rcv_window_now)
1365 time_to_ack = 1;
1366 }
1367 }
1368 if (time_to_ack)
1369 tcp_send_ack(sk);
1370}
1371
1372
1373
1374
1375
1376
1377
1378static long tcp_data_wait(struct sock *sk, long timeo)
1379{
1380 DEFINE_WAIT(wait);
1381
1382 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1383
1384 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1385 release_sock(sk);
1386
1387 if (skb_queue_empty(&sk->sk_receive_queue))
1388 timeo = schedule_timeout(timeo);
1389
1390 lock_sock(sk);
1391 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1392
1393 finish_wait(sk->sk_sleep, &wait);
1394 return timeo;
1395}
1396
1397static void tcp_prequeue_process(struct sock *sk)
1398{
1399 struct sk_buff *skb;
1400 struct tcp_opt *tp = tcp_sk(sk);
1401
1402 NET_ADD_STATS_USER(TCPPrequeued, skb_queue_len(&tp->ucopy.prequeue));
1403
1404
1405
1406 local_bh_disable();
1407 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1408 sk->sk_backlog_rcv(sk, skb);
1409 local_bh_enable();
1410
1411
1412 tp->ucopy.memory = 0;
1413}
1414
1415static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1416{
1417 struct sk_buff *skb;
1418 u32 offset;
1419
1420 skb_queue_walk(&sk->sk_receive_queue, skb) {
1421 offset = seq - TCP_SKB_CB(skb)->seq;
1422 if (skb->h.th->syn)
1423 offset--;
1424 if (offset < skb->len || skb->h.th->fin) {
1425 *off = offset;
1426 return skb;
1427 }
1428 }
1429 return NULL;
1430}
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1444 sk_read_actor_t recv_actor)
1445{
1446 struct sk_buff *skb;
1447 struct tcp_opt *tp = tcp_sk(sk);
1448 u32 seq = tp->copied_seq;
1449 u32 offset;
1450 int copied = 0;
1451
1452 if (sk->sk_state == TCP_LISTEN)
1453 return -ENOTCONN;
1454 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1455 if (offset < skb->len) {
1456 size_t used, len;
1457
1458 len = skb->len - offset;
1459
1460 if (tp->urg_data) {
1461 u32 urg_offset = tp->urg_seq - seq;
1462 if (urg_offset < len)
1463 len = urg_offset;
1464 if (!len)
1465 break;
1466 }
1467 used = recv_actor(desc, skb, offset, len);
1468 if (used <= len) {
1469 seq += used;
1470 copied += used;
1471 offset += used;
1472 }
1473 if (offset != skb->len)
1474 break;
1475 }
1476 if (skb->h.th->fin) {
1477 tcp_eat_skb(sk, skb);
1478 ++seq;
1479 break;
1480 }
1481 tcp_eat_skb(sk, skb);
1482 if (!desc->count)
1483 break;
1484 }
1485 tp->copied_seq = seq;
1486
1487 if (copied)
1488 cleanup_rbuf(sk, copied);
1489 return copied;
1490}
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1501 int len, int nonblock, int flags, int *addr_len)
1502{
1503 struct tcp_opt *tp = tcp_sk(sk);
1504 int copied = 0;
1505 u32 peek_seq;
1506 u32 *seq;
1507 unsigned long used;
1508 int err;
1509 int target;
1510 long timeo;
1511 struct task_struct *user_recv = NULL;
1512
1513 lock_sock(sk);
1514
1515 TCP_CHECK_TIMER(sk);
1516
1517 err = -ENOTCONN;
1518 if (sk->sk_state == TCP_LISTEN)
1519 goto out;
1520
1521 timeo = sock_rcvtimeo(sk, nonblock);
1522
1523
1524 if (flags & MSG_OOB)
1525 goto recv_urg;
1526
1527 seq = &tp->copied_seq;
1528 if (flags & MSG_PEEK) {
1529 peek_seq = tp->copied_seq;
1530 seq = &peek_seq;
1531 }
1532
1533 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1534
1535 do {
1536 struct sk_buff *skb;
1537 u32 offset;
1538
1539
1540 if (copied && tp->urg_data && tp->urg_seq == *seq)
1541 break;
1542
1543
1544
1545
1546
1547 if (signal_pending(current)) {
1548 if (copied)
1549 break;
1550 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
1551 break;
1552 }
1553
1554
1555
1556 skb = skb_peek(&sk->sk_receive_queue);
1557 do {
1558 if (!skb)
1559 break;
1560
1561
1562
1563
1564 if (before(*seq, TCP_SKB_CB(skb)->seq)) {
1565 printk(KERN_INFO "recvmsg bug: copied %X "
1566 "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
1567 break;
1568 }
1569 offset = *seq - TCP_SKB_CB(skb)->seq;
1570 if (skb->h.th->syn)
1571 offset--;
1572 if (offset < skb->len)
1573 goto found_ok_skb;
1574 if (skb->h.th->fin)
1575 goto found_fin_ok;
1576 BUG_TRAP(flags & MSG_PEEK);
1577 skb = skb->next;
1578 } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
1579
1580
1581
1582 if (copied >= target && !sk->sk_backlog.tail)
1583 break;
1584
1585 if (copied) {
1586 if (sk->sk_err ||
1587 sk->sk_state == TCP_CLOSE ||
1588 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1589 !timeo ||
1590 (flags & MSG_PEEK))
1591 break;
1592 } else {
1593 if (sock_flag(sk, SOCK_DONE))
1594 break;
1595
1596 if (sk->sk_err) {
1597 copied = sock_error(sk);
1598 break;
1599 }
1600
1601 if (sk->sk_shutdown & RCV_SHUTDOWN)
1602 break;
1603
1604 if (sk->sk_state == TCP_CLOSE) {
1605 if (!sock_flag(sk, SOCK_DONE)) {
1606
1607
1608
1609 copied = -ENOTCONN;
1610 break;
1611 }
1612 break;
1613 }
1614
1615 if (!timeo) {
1616 copied = -EAGAIN;
1617 break;
1618 }
1619 }
1620
1621 cleanup_rbuf(sk, copied);
1622
1623 if (tp->ucopy.task == user_recv) {
1624
1625 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1626 user_recv = current;
1627 tp->ucopy.task = user_recv;
1628 tp->ucopy.iov = msg->msg_iov;
1629 }
1630
1631 tp->ucopy.len = len;
1632
1633 BUG_TRAP(tp->copied_seq == tp->rcv_nxt ||
1634 (flags & (MSG_PEEK | MSG_TRUNC)));
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662 if (skb_queue_len(&tp->ucopy.prequeue))
1663 goto do_prequeue;
1664
1665
1666 }
1667
1668 if (copied >= target) {
1669
1670 release_sock(sk);
1671 lock_sock(sk);
1672 } else {
1673 timeo = tcp_data_wait(sk, timeo);
1674 }
1675
1676 if (user_recv) {
1677 int chunk;
1678
1679
1680
1681 if ((chunk = len - tp->ucopy.len) != 0) {
1682 NET_ADD_STATS_USER(TCPDirectCopyFromBacklog, chunk);
1683 len -= chunk;
1684 copied += chunk;
1685 }
1686
1687 if (tp->rcv_nxt == tp->copied_seq &&
1688 skb_queue_len(&tp->ucopy.prequeue)) {
1689do_prequeue:
1690 tcp_prequeue_process(sk);
1691
1692 if ((chunk = len - tp->ucopy.len) != 0) {
1693 NET_ADD_STATS_USER(TCPDirectCopyFromPrequeue, chunk);
1694 len -= chunk;
1695 copied += chunk;
1696 }
1697 }
1698 }
1699 if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
1700 if (net_ratelimit())
1701 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1702 current->comm, current->pid);
1703 peek_seq = tp->copied_seq;
1704 }
1705 continue;
1706
1707 found_ok_skb:
1708
1709 used = skb->len - offset;
1710 if (len < used)
1711 used = len;
1712
1713
1714 if (tp->urg_data) {
1715 u32 urg_offset = tp->urg_seq - *seq;
1716 if (urg_offset < used) {
1717 if (!urg_offset) {
1718 if (!sock_flag(sk, SOCK_URGINLINE)) {
1719 ++*seq;
1720 offset++;
1721 used--;
1722 if (!used)
1723 goto skip_copy;
1724 }
1725 } else
1726 used = urg_offset;
1727 }
1728 }
1729
1730 if (!(flags & MSG_TRUNC)) {
1731 err = skb_copy_datagram_iovec(skb, offset,
1732 msg->msg_iov, used);
1733 if (err) {
1734
1735 if (!copied)
1736 copied = -EFAULT;
1737 break;
1738 }
1739 }
1740
1741 *seq += used;
1742 copied += used;
1743 len -= used;
1744
1745skip_copy:
1746 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1747 tp->urg_data = 0;
1748 tcp_fast_path_check(sk, tp);
1749 }
1750 if (used + offset < skb->len)
1751 continue;
1752
1753 if (skb->h.th->fin)
1754 goto found_fin_ok;
1755 if (!(flags & MSG_PEEK))
1756 tcp_eat_skb(sk, skb);
1757 continue;
1758
1759 found_fin_ok:
1760
1761 ++*seq;
1762 if (!(flags & MSG_PEEK))
1763 tcp_eat_skb(sk, skb);
1764 break;
1765 } while (len > 0);
1766
1767 if (user_recv) {
1768 if (skb_queue_len(&tp->ucopy.prequeue)) {
1769 int chunk;
1770
1771 tp->ucopy.len = copied > 0 ? len : 0;
1772
1773 tcp_prequeue_process(sk);
1774
1775 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1776 NET_ADD_STATS_USER(TCPDirectCopyFromPrequeue, chunk);
1777 len -= chunk;
1778 copied += chunk;
1779 }
1780 }
1781
1782 tp->ucopy.task = NULL;
1783 tp->ucopy.len = 0;
1784 }
1785
1786
1787
1788
1789
1790
1791 cleanup_rbuf(sk, copied);
1792
1793 TCP_CHECK_TIMER(sk);
1794 release_sock(sk);
1795 return copied;
1796
1797out:
1798 TCP_CHECK_TIMER(sk);
1799 release_sock(sk);
1800 return err;
1801
1802recv_urg:
1803 err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);
1804 goto out;
1805}
1806
1807
1808
1809
1810
1811
1812
1813
1814static unsigned char new_state[16] = {
1815
1816 TCP_CLOSE,
1817 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1818 TCP_CLOSE,
1819 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1820 TCP_FIN_WAIT1,
1821 TCP_FIN_WAIT2,
1822 TCP_CLOSE,
1823 TCP_CLOSE,
1824 TCP_LAST_ACK | TCP_ACTION_FIN,
1825 TCP_LAST_ACK,
1826 TCP_CLOSE,
1827 TCP_CLOSING,
1828};
1829
1830static int tcp_close_state(struct sock *sk)
1831{
1832 int next = (int)new_state[sk->sk_state];
1833 int ns = next & TCP_STATE_MASK;
1834
1835 tcp_set_state(sk, ns);
1836
1837 return next & TCP_ACTION_FIN;
1838}
1839
1840
1841
1842
1843
1844
1845void tcp_shutdown(struct sock *sk, int how)
1846{
1847
1848
1849
1850
1851 if (!(how & SEND_SHUTDOWN))
1852 return;
1853
1854
1855 if ((1 << sk->sk_state) &
1856 (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1857 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1858
1859 if (tcp_close_state(sk))
1860 tcp_send_fin(sk);
1861 }
1862}
1863
1864
1865
1866
1867
1868
1869static inline int closing(struct sock *sk)
1870{
1871 return (1 << sk->sk_state) &
1872 (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
1873}
1874
1875static __inline__ void tcp_kill_sk_queues(struct sock *sk)
1876{
1877
1878 __skb_queue_purge(&sk->sk_receive_queue);
1879
1880
1881 __skb_queue_purge(&sk->sk_error_queue);
1882
1883
1884 BUG_TRAP(skb_queue_empty(&sk->sk_write_queue));
1885
1886
1887 tcp_mem_reclaim(sk);
1888
1889 BUG_TRAP(!sk->sk_wmem_queued);
1890 BUG_TRAP(!sk->sk_forward_alloc);
1891
1892
1893
1894
1895
1896}
1897
1898
1899
1900
1901
1902
1903
1904void tcp_destroy_sock(struct sock *sk)
1905{
1906 BUG_TRAP(sk->sk_state == TCP_CLOSE);
1907 BUG_TRAP(sock_flag(sk, SOCK_DEAD));
1908
1909
1910 BUG_TRAP(sk_unhashed(sk));
1911
1912
1913 BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash);
1914
1915#ifdef TCP_DEBUG
1916 if (sk->sk_zapped) {
1917 printk(KERN_DEBUG "TCP: double destroy sk=%p\n", sk);
1918 sock_hold(sk);
1919 }
1920 sk->sk_zapped = 1;
1921#endif
1922
1923 sk->sk_prot->destroy(sk);
1924
1925 tcp_kill_sk_queues(sk);
1926
1927 xfrm_sk_free_policy(sk);
1928
1929#ifdef INET_REFCNT_DEBUG
1930 if (atomic_read(&sk->sk_refcnt) != 1) {
1931 printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n",
1932 sk, atomic_read(&sk->sk_refcnt));
1933 }
1934#endif
1935
1936 atomic_dec(&tcp_orphan_count);
1937 sock_put(sk);
1938}
1939
1940void tcp_close(struct sock *sk, long timeout)
1941{
1942 struct sk_buff *skb;
1943 int data_was_unread = 0;
1944
1945 lock_sock(sk);
1946 sk->sk_shutdown = SHUTDOWN_MASK;
1947
1948 if (sk->sk_state == TCP_LISTEN) {
1949 tcp_set_state(sk, TCP_CLOSE);
1950
1951
1952 tcp_listen_stop(sk);
1953
1954 goto adjudge_to_death;
1955 }
1956
1957
1958
1959
1960
1961 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1962 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
1963 skb->h.th->fin;
1964 data_was_unread += len;
1965 __kfree_skb(skb);
1966 }
1967
1968 tcp_mem_reclaim(sk);
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979 if (data_was_unread) {
1980
1981 NET_INC_STATS_USER(TCPAbortOnClose);
1982 tcp_set_state(sk, TCP_CLOSE);
1983 tcp_send_active_reset(sk, GFP_KERNEL);
1984 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1985
1986 sk->sk_prot->disconnect(sk, 0);
1987 NET_INC_STATS_USER(TCPAbortOnData);
1988 } else if (tcp_close_state(sk)) {
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014 tcp_send_fin(sk);
2015 }
2016
2017 if (timeout) {
2018 struct task_struct *tsk = current;
2019 DEFINE_WAIT(wait);
2020
2021 do {
2022 prepare_to_wait(sk->sk_sleep, &wait,
2023 TASK_INTERRUPTIBLE);
2024 if (!closing(sk))
2025 break;
2026 release_sock(sk);
2027 timeout = schedule_timeout(timeout);
2028 lock_sock(sk);
2029 } while (!signal_pending(tsk) && timeout);
2030
2031 finish_wait(sk->sk_sleep, &wait);
2032 }
2033
2034adjudge_to_death:
2035
2036 release_sock(sk);
2037
2038
2039
2040
2041
2042 local_bh_disable();
2043 bh_lock_sock(sk);
2044 BUG_TRAP(!sock_owned_by_user(sk));
2045
2046 sock_hold(sk);
2047 sock_orphan(sk);
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063 if (sk->sk_state == TCP_FIN_WAIT2) {
2064 struct tcp_opt *tp = tcp_sk(sk);
2065 if (tp->linger2 < 0) {
2066 tcp_set_state(sk, TCP_CLOSE);
2067 tcp_send_active_reset(sk, GFP_ATOMIC);
2068 NET_INC_STATS_BH(TCPAbortOnLinger);
2069 } else {
2070 int tmo = tcp_fin_time(tp);
2071
2072 if (tmo > TCP_TIMEWAIT_LEN) {
2073 tcp_reset_keepalive_timer(sk, tcp_fin_time(tp));
2074 } else {
2075 atomic_inc(&tcp_orphan_count);
2076 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
2077 goto out;
2078 }
2079 }
2080 }
2081 if (sk->sk_state != TCP_CLOSE) {
2082 tcp_mem_reclaim(sk);
2083 if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
2084 (sk->sk_wmem_queued > SOCK_MIN_SNDBUF &&
2085 atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
2086 if (net_ratelimit())
2087 printk(KERN_INFO "TCP: too many of orphaned "
2088 "sockets\n");
2089 tcp_set_state(sk, TCP_CLOSE);
2090 tcp_send_active_reset(sk, GFP_ATOMIC);
2091 NET_INC_STATS_BH(TCPAbortOnMemory);
2092 }
2093 }
2094 atomic_inc(&tcp_orphan_count);
2095
2096 if (sk->sk_state == TCP_CLOSE)
2097 tcp_destroy_sock(sk);
2098
2099
2100out:
2101 bh_unlock_sock(sk);
2102 local_bh_enable();
2103 sock_put(sk);
2104}
2105
2106
2107
2108static inline int tcp_need_reset(int state)
2109{
2110 return (1 << state) &
2111 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2112 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2113}
2114
2115int tcp_disconnect(struct sock *sk, int flags)
2116{
2117 struct inet_opt *inet = inet_sk(sk);
2118 struct tcp_opt *tp = tcp_sk(sk);
2119 int err = 0;
2120 int old_state = sk->sk_state;
2121
2122 if (old_state != TCP_CLOSE)
2123 tcp_set_state(sk, TCP_CLOSE);
2124
2125
2126 if (old_state == TCP_LISTEN) {
2127 tcp_listen_stop(sk);
2128 } else if (tcp_need_reset(old_state) ||
2129 (tp->snd_nxt != tp->write_seq &&
2130 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2131
2132
2133
2134 tcp_send_active_reset(sk, gfp_any());
2135 sk->sk_err = ECONNRESET;
2136 } else if (old_state == TCP_SYN_SENT)
2137 sk->sk_err = ECONNRESET;
2138
2139 tcp_clear_xmit_timers(sk);
2140 __skb_queue_purge(&sk->sk_receive_queue);
2141 tcp_writequeue_purge(sk);
2142 __skb_queue_purge(&tp->out_of_order_queue);
2143
2144 inet->dport = 0;
2145
2146 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2147 inet_reset_saddr(sk);
2148
2149 sk->sk_shutdown = 0;
2150 sock_reset_flag(sk, SOCK_DONE);
2151 tp->srtt = 0;
2152 if ((tp->write_seq += tp->max_window + 2) == 0)
2153 tp->write_seq = 1;
2154 tp->backoff = 0;
2155 tp->snd_cwnd = 2;
2156 tp->probes_out = 0;
2157 tp->packets_out = 0;
2158 tp->snd_ssthresh = 0x7fffffff;
2159 tp->snd_cwnd_cnt = 0;
2160 tp->ca_state = TCP_CA_Open;
2161 tcp_clear_retrans(tp);
2162 tcp_delack_init(tp);
2163 tp->send_head = NULL;
2164 tp->saw_tstamp = 0;
2165 tcp_sack_reset(tp);
2166 __sk_dst_reset(sk);
2167
2168 BUG_TRAP(!inet->num || tp->bind_hash);
2169
2170 sk->sk_error_report(sk);
2171 return err;
2172}
2173
2174
2175
2176
2177
2178static int wait_for_connect(struct sock *sk, long timeo)
2179{
2180 struct tcp_opt *tp = tcp_sk(sk);
2181 DEFINE_WAIT(wait);
2182 int err;
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198 for (;;) {
2199 prepare_to_wait_exclusive(sk->sk_sleep, &wait,
2200 TASK_INTERRUPTIBLE);
2201 release_sock(sk);
2202 if (!tp->accept_queue)
2203 timeo = schedule_timeout(timeo);
2204 lock_sock(sk);
2205 err = 0;
2206 if (tp->accept_queue)
2207 break;
2208 err = -EINVAL;
2209 if (sk->sk_state != TCP_LISTEN)
2210 break;
2211 err = sock_intr_errno(timeo);
2212 if (signal_pending(current))
2213 break;
2214 err = -EAGAIN;
2215 if (!timeo)
2216 break;
2217 }
2218 finish_wait(sk->sk_sleep, &wait);
2219 return err;
2220}
2221
2222
2223
2224
2225
2226struct sock *tcp_accept(struct sock *sk, int flags, int *err)
2227{
2228 struct tcp_opt *tp = tcp_sk(sk);
2229 struct open_request *req;
2230 struct sock *newsk;
2231 int error;
2232
2233 lock_sock(sk);
2234
2235
2236
2237
2238 error = -EINVAL;
2239 if (sk->sk_state != TCP_LISTEN)
2240 goto out;
2241
2242
2243 if (!tp->accept_queue) {
2244 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
2245
2246
2247 error = -EAGAIN;
2248 if (!timeo)
2249 goto out;
2250
2251 error = wait_for_connect(sk, timeo);
2252 if (error)
2253 goto out;
2254 }
2255
2256 req = tp->accept_queue;
2257 if ((tp->accept_queue = req->dl_next) == NULL)
2258 tp->accept_queue_tail = NULL;
2259
2260 newsk = req->sk;
2261 tcp_acceptq_removed(sk);
2262 tcp_openreq_fastfree(req);
2263 BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
2264 release_sock(sk);
2265 return newsk;
2266
2267out:
2268 release_sock(sk);
2269 *err = error;
2270 return NULL;
2271}
2272
2273
2274
2275
2276int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval,
2277 int optlen)
2278{
2279 struct tcp_opt *tp = tcp_sk(sk);
2280 int val;
2281 int err = 0;
2282
2283 if (level != SOL_TCP)
2284 return tp->af_specific->setsockopt(sk, level, optname,
2285 optval, optlen);
2286
2287 if (optlen < sizeof(int))
2288 return -EINVAL;
2289
2290 if (get_user(val, (int *)optval))
2291 return -EFAULT;
2292
2293 lock_sock(sk);
2294
2295 switch (optname) {
2296 case TCP_MAXSEG:
2297
2298
2299
2300 if (val < 8 || val > MAX_TCP_WINDOW) {
2301 err = -EINVAL;
2302 break;
2303 }
2304 tp->user_mss = val;
2305 break;
2306
2307 case TCP_NODELAY:
2308 if (val) {
2309
2310
2311
2312
2313
2314
2315
2316
2317 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
2318 tcp_push_pending_frames(sk, tp);
2319 } else {
2320 tp->nonagle &= ~TCP_NAGLE_OFF;
2321 }
2322 break;
2323
2324 case TCP_CORK:
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336 if (val) {
2337 tp->nonagle |= TCP_NAGLE_CORK;
2338 } else {
2339 tp->nonagle &= ~TCP_NAGLE_CORK;
2340 if (tp->nonagle&TCP_NAGLE_OFF)
2341 tp->nonagle |= TCP_NAGLE_PUSH;
2342 tcp_push_pending_frames(sk, tp);
2343 }
2344 break;
2345
2346 case TCP_KEEPIDLE:
2347 if (val < 1 || val > MAX_TCP_KEEPIDLE)
2348 err = -EINVAL;
2349 else {
2350 tp->keepalive_time = val * HZ;
2351 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2352 !((1 << sk->sk_state) &
2353 (TCPF_CLOSE | TCPF_LISTEN))) {
2354 __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
2355 if (tp->keepalive_time > elapsed)
2356 elapsed = tp->keepalive_time - elapsed;
2357 else
2358 elapsed = 0;
2359 tcp_reset_keepalive_timer(sk, elapsed);
2360 }
2361 }
2362 break;
2363 case TCP_KEEPINTVL:
2364 if (val < 1 || val > MAX_TCP_KEEPINTVL)
2365 err = -EINVAL;
2366 else
2367 tp->keepalive_intvl = val * HZ;
2368 break;
2369 case TCP_KEEPCNT:
2370 if (val < 1 || val > MAX_TCP_KEEPCNT)
2371 err = -EINVAL;
2372 else
2373 tp->keepalive_probes = val;
2374 break;
2375 case TCP_SYNCNT:
2376 if (val < 1 || val > MAX_TCP_SYNCNT)
2377 err = -EINVAL;
2378 else
2379 tp->syn_retries = val;
2380 break;
2381
2382 case TCP_LINGER2:
2383 if (val < 0)
2384 tp->linger2 = -1;
2385 else if (val > sysctl_tcp_fin_timeout / HZ)
2386 tp->linger2 = 0;
2387 else
2388 tp->linger2 = val * HZ;
2389 break;
2390
2391 case TCP_DEFER_ACCEPT:
2392 tp->defer_accept = 0;
2393 if (val > 0) {
2394
2395
2396 while (tp->defer_accept < 32 &&
2397 val > ((TCP_TIMEOUT_INIT / HZ) <<
2398 tp->defer_accept))
2399 tp->defer_accept++;
2400 tp->defer_accept++;
2401 }
2402 break;
2403
2404 case TCP_WINDOW_CLAMP:
2405 if (!val) {
2406 if (sk->sk_state != TCP_CLOSE) {
2407 err = -EINVAL;
2408 break;
2409 }
2410 tp->window_clamp = 0;
2411 } else
2412 tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
2413 SOCK_MIN_RCVBUF / 2 : val;
2414 break;
2415
2416 case TCP_QUICKACK:
2417 if (!val) {
2418 tp->ack.pingpong = 1;
2419 } else {
2420 tp->ack.pingpong = 0;
2421 if ((1 << sk->sk_state) &
2422 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
2423 tcp_ack_scheduled(tp)) {
2424 tp->ack.pending |= TCP_ACK_PUSHED;
2425 cleanup_rbuf(sk, 1);
2426 if (!(val & 1))
2427 tp->ack.pingpong = 1;
2428 }
2429 }
2430 break;
2431
2432 default:
2433 err = -ENOPROTOOPT;
2434 break;
2435 };
2436 release_sock(sk);
2437 return err;
2438}
2439
2440int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval,
2441 int *optlen)
2442{
2443 struct tcp_opt *tp = tcp_sk(sk);
2444 int val, len;
2445
2446 if (level != SOL_TCP)
2447 return tp->af_specific->getsockopt(sk, level, optname,
2448 optval, optlen);
2449
2450 if (get_user(len, optlen))
2451 return -EFAULT;
2452
2453 len = min_t(unsigned int, len, sizeof(int));
2454
2455 if (len < 0)
2456 return -EINVAL;
2457
2458 switch (optname) {
2459 case TCP_MAXSEG:
2460 val = tp->mss_cache_std;
2461 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2462 val = tp->user_mss;
2463 break;
2464 case TCP_NODELAY:
2465 val = !!(tp->nonagle&TCP_NAGLE_OFF);
2466 break;
2467 case TCP_CORK:
2468 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2469 break;
2470 case TCP_KEEPIDLE:
2471 val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ;
2472 break;
2473 case TCP_KEEPINTVL:
2474 val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ;
2475 break;
2476 case TCP_KEEPCNT:
2477 val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
2478 break;
2479 case TCP_SYNCNT:
2480 val = tp->syn_retries ? : sysctl_tcp_syn_retries;
2481 break;
2482 case TCP_LINGER2:
2483 val = tp->linger2;
2484 if (val >= 0)
2485 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2486 break;
2487 case TCP_DEFER_ACCEPT:
2488 val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) <<
2489 (tp->defer_accept - 1));
2490 break;
2491 case TCP_WINDOW_CLAMP:
2492 val = tp->window_clamp;
2493 break;
2494 case TCP_INFO: {
2495 struct tcp_info info;
2496 u32 now = tcp_time_stamp;
2497
2498 if (get_user(len, optlen))
2499 return -EFAULT;
2500 info.tcpi_state = sk->sk_state;
2501 info.tcpi_ca_state = tp->ca_state;
2502 info.tcpi_retransmits = tp->retransmits;
2503 info.tcpi_probes = tp->probes_out;
2504 info.tcpi_backoff = tp->backoff;
2505 info.tcpi_options = 0;
2506 if (tp->tstamp_ok)
2507 info.tcpi_options |= TCPI_OPT_TIMESTAMPS;
2508 if (tp->sack_ok)
2509 info.tcpi_options |= TCPI_OPT_SACK;
2510 if (tp->wscale_ok) {
2511 info.tcpi_options |= TCPI_OPT_WSCALE;
2512 info.tcpi_snd_wscale = tp->snd_wscale;
2513 info.tcpi_rcv_wscale = tp->rcv_wscale;
2514 } else {
2515 info.tcpi_snd_wscale = 0;
2516 info.tcpi_rcv_wscale = 0;
2517 }
2518 if (tp->ecn_flags & TCP_ECN_OK)
2519 info.tcpi_options |= TCPI_OPT_ECN;
2520
2521 info.tcpi_rto = (1000000 * tp->rto) / HZ;
2522 info.tcpi_ato = (1000000 * tp->ack.ato) / HZ;
2523 info.tcpi_snd_mss = tp->mss_cache_std;
2524 info.tcpi_rcv_mss = tp->ack.rcv_mss;
2525
2526 info.tcpi_unacked = tp->packets_out;
2527 info.tcpi_sacked = tp->sacked_out;
2528 info.tcpi_lost = tp->lost_out;
2529 info.tcpi_retrans = tp->retrans_out;
2530 info.tcpi_fackets = tp->fackets_out;
2531
2532 info.tcpi_last_data_sent = ((now - tp->lsndtime) * 1000) / HZ;
2533 info.tcpi_last_ack_sent = 0;
2534 info.tcpi_last_data_recv = ((now -
2535 tp->ack.lrcvtime) * 1000) / HZ;
2536 info.tcpi_last_ack_recv = ((now - tp->rcv_tstamp) * 1000) / HZ;
2537
2538 info.tcpi_pmtu = tp->pmtu_cookie;
2539 info.tcpi_rcv_ssthresh = tp->rcv_ssthresh;
2540 info.tcpi_rtt = ((1000000 * tp->srtt) / HZ) >> 3;
2541 info.tcpi_rttvar = ((1000000 * tp->mdev) / HZ) >> 2;
2542 info.tcpi_snd_ssthresh = tp->snd_ssthresh;
2543 info.tcpi_snd_cwnd = tp->snd_cwnd;
2544 info.tcpi_advmss = tp->advmss;
2545 info.tcpi_reordering = tp->reordering;
2546
2547 len = min_t(unsigned int, len, sizeof(info));
2548 if (put_user(len, optlen))
2549 return -EFAULT;
2550 if (copy_to_user(optval, &info, len))
2551 return -EFAULT;
2552 return 0;
2553 }
2554 case TCP_QUICKACK:
2555 val = !tp->ack.pingpong;
2556 break;
2557 default:
2558 return -ENOPROTOOPT;
2559 };
2560
2561 if (put_user(len, optlen))
2562 return -EFAULT;
2563 if (copy_to_user(optval, &val, len))
2564 return -EFAULT;
2565 return 0;
2566}
2567
2568
2569extern void __skb_cb_too_small_for_tcp(int, int);
2570extern void tcpdiag_init(void);
2571
2572void __init tcp_init(void)
2573{
2574 struct sk_buff *skb = NULL;
2575 unsigned long goal;
2576 int order, i;
2577
2578 if (sizeof(struct tcp_skb_cb) > sizeof(skb->cb))
2579 __skb_cb_too_small_for_tcp(sizeof(struct tcp_skb_cb),
2580 sizeof(skb->cb));
2581
2582 tcp_openreq_cachep = kmem_cache_create("tcp_open_request",
2583 sizeof(struct open_request),
2584 0, SLAB_HWCACHE_ALIGN,
2585 NULL, NULL);
2586 if (!tcp_openreq_cachep)
2587 panic("tcp_init: Cannot alloc open_request cache.");
2588
2589 tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket",
2590 sizeof(struct tcp_bind_bucket),
2591 0, SLAB_HWCACHE_ALIGN,
2592 NULL, NULL);
2593 if (!tcp_bucket_cachep)
2594 panic("tcp_init: Cannot alloc tcp_bind_bucket cache.");
2595
2596 tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket",
2597 sizeof(struct tcp_tw_bucket),
2598 0, SLAB_HWCACHE_ALIGN,
2599 NULL, NULL);
2600 if (!tcp_timewait_cachep)
2601 panic("tcp_init: Cannot alloc tcp_tw_bucket cache.");
2602
2603
2604
2605
2606
2607
2608 if (num_physpages >= (128 * 1024))
2609 goal = num_physpages >> (21 - PAGE_SHIFT);
2610 else
2611 goal = num_physpages >> (23 - PAGE_SHIFT);
2612
2613 for (order = 0; (1UL << order) < goal; order++)
2614 ;
2615 do {
2616 tcp_ehash_size = (1UL << order) * PAGE_SIZE /
2617 sizeof(struct tcp_ehash_bucket);
2618 tcp_ehash_size >>= 1;
2619 while (tcp_ehash_size & (tcp_ehash_size - 1))
2620 tcp_ehash_size--;
2621 tcp_ehash = (struct tcp_ehash_bucket *)
2622 __get_free_pages(GFP_ATOMIC, order);
2623 } while (!tcp_ehash && --order > 0);
2624
2625 if (!tcp_ehash)
2626 panic("Failed to allocate TCP established hash table\n");
2627 for (i = 0; i < (tcp_ehash_size << 1); i++) {
2628 tcp_ehash[i].lock = RW_LOCK_UNLOCKED;
2629 INIT_HLIST_HEAD(&tcp_ehash[i].chain);
2630 }
2631
2632 do {
2633 tcp_bhash_size = (1UL << order) * PAGE_SIZE /
2634 sizeof(struct tcp_bind_hashbucket);
2635 if ((tcp_bhash_size > (64 * 1024)) && order > 0)
2636 continue;
2637 tcp_bhash = (struct tcp_bind_hashbucket *)
2638 __get_free_pages(GFP_ATOMIC, order);
2639 } while (!tcp_bhash && --order >= 0);
2640
2641 if (!tcp_bhash)
2642 panic("Failed to allocate TCP bind hash table\n");
2643 for (i = 0; i < tcp_bhash_size; i++) {
2644 tcp_bhash[i].lock = SPIN_LOCK_UNLOCKED;
2645 INIT_HLIST_HEAD(&tcp_bhash[i].chain);
2646 }
2647
2648
2649
2650
2651 if (order > 4) {
2652 sysctl_local_port_range[0] = 32768;
2653 sysctl_local_port_range[1] = 61000;
2654 sysctl_tcp_max_tw_buckets = 180000;
2655 sysctl_tcp_max_orphans = 4096 << (order - 4);
2656 sysctl_max_syn_backlog = 1024;
2657 } else if (order < 3) {
2658 sysctl_local_port_range[0] = 1024 * (3 - order);
2659 sysctl_tcp_max_tw_buckets >>= (3 - order);
2660 sysctl_tcp_max_orphans >>= (3 - order);
2661 sysctl_max_syn_backlog = 128;
2662 }
2663 tcp_port_rover = sysctl_local_port_range[0] - 1;
2664
2665 sysctl_tcp_mem[0] = 768 << order;
2666 sysctl_tcp_mem[1] = 1024 << order;
2667 sysctl_tcp_mem[2] = 1536 << order;
2668 if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 512)
2669 sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 512;
2670 if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 512)
2671 sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 512;
2672
2673 if (order < 3) {
2674 sysctl_tcp_wmem[2] = 64 * 1024;
2675 sysctl_tcp_rmem[0] = PAGE_SIZE;
2676 sysctl_tcp_rmem[1] = 43689;
2677 sysctl_tcp_rmem[2] = 2 * 43689;
2678 }
2679
2680 printk(KERN_INFO "TCP: Hash tables configured "
2681 "(established %d bind %d)\n",
2682 tcp_ehash_size << 1, tcp_bhash_size);
2683
2684 tcpdiag_init();
2685}
2686
2687EXPORT_SYMBOL(__tcp_mem_reclaim);
2688EXPORT_SYMBOL(sysctl_tcp_rmem);
2689EXPORT_SYMBOL(sysctl_tcp_wmem);
2690EXPORT_SYMBOL(tcp_accept);
2691EXPORT_SYMBOL(tcp_close);
2692EXPORT_SYMBOL(tcp_close_state);
2693EXPORT_SYMBOL(tcp_destroy_sock);
2694EXPORT_SYMBOL(tcp_disconnect);
2695EXPORT_SYMBOL(tcp_getsockopt);
2696EXPORT_SYMBOL(tcp_ioctl);
2697EXPORT_SYMBOL(tcp_openreq_cachep);
2698EXPORT_SYMBOL(tcp_poll);
2699EXPORT_SYMBOL(tcp_read_sock);
2700EXPORT_SYMBOL(tcp_recvmsg);
2701EXPORT_SYMBOL(tcp_sendmsg);
2702EXPORT_SYMBOL(tcp_sendpage);
2703EXPORT_SYMBOL(tcp_setsockopt);
2704EXPORT_SYMBOL(tcp_shutdown);
2705EXPORT_SYMBOL(tcp_sockets_allocated);
2706EXPORT_SYMBOL(tcp_statistics);
2707EXPORT_SYMBOL(tcp_timewait_cachep);
2708EXPORT_SYMBOL(tcp_write_space);
2709