1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248#include <linux/kernel.h>
249#include <linux/module.h>
250#include <linux/types.h>
251#include <linux/fcntl.h>
252#include <linux/poll.h>
253#include <linux/init.h>
254#include <linux/fs.h>
255#include <linux/skbuff.h>
256#include <linux/scatterlist.h>
257#include <linux/splice.h>
258#include <linux/net.h>
259#include <linux/socket.h>
260#include <linux/random.h>
261#include <linux/bootmem.h>
262#include <linux/highmem.h>
263#include <linux/swap.h>
264#include <linux/cache.h>
265#include <linux/err.h>
266#include <linux/crypto.h>
267#include <linux/time.h>
268#include <linux/slab.h>
269
270#include <net/icmp.h>
271#include <net/tcp.h>
272#include <net/xfrm.h>
273#include <net/ip.h>
274#include <net/netdma.h>
275#include <net/sock.h>
276
277#include <asm/uaccess.h>
278#include <asm/ioctls.h>
279
280int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
281
282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284
285long sysctl_tcp_mem[3] __read_mostly;
286int sysctl_tcp_wmem[3] __read_mostly;
287int sysctl_tcp_rmem[3] __read_mostly;
288
289EXPORT_SYMBOL(sysctl_tcp_mem);
290EXPORT_SYMBOL(sysctl_tcp_rmem);
291EXPORT_SYMBOL(sysctl_tcp_wmem);
292
293atomic_long_t tcp_memory_allocated;
294EXPORT_SYMBOL(tcp_memory_allocated);
295
296
297
298
299struct percpu_counter tcp_sockets_allocated;
300EXPORT_SYMBOL(tcp_sockets_allocated);
301
302
303
304
305struct tcp_splice_state {
306 struct pipe_inode_info *pipe;
307 size_t len;
308 unsigned int flags;
309};
310
311
312
313
314
315
316
317int tcp_memory_pressure __read_mostly;
318EXPORT_SYMBOL(tcp_memory_pressure);
319
320void tcp_enter_memory_pressure(struct sock *sk)
321{
322 if (!tcp_memory_pressure) {
323 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
324 tcp_memory_pressure = 1;
325 }
326}
327EXPORT_SYMBOL(tcp_enter_memory_pressure);
328
329
330static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
331{
332 u8 res = 0;
333
334 if (seconds > 0) {
335 int period = timeout;
336
337 res = 1;
338 while (seconds > period && res < 255) {
339 res++;
340 timeout <<= 1;
341 if (timeout > rto_max)
342 timeout = rto_max;
343 period += timeout;
344 }
345 }
346 return res;
347}
348
349
350static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
351{
352 int period = 0;
353
354 if (retrans > 0) {
355 period = timeout;
356 while (--retrans) {
357 timeout <<= 1;
358 if (timeout > rto_max)
359 timeout = rto_max;
360 period += timeout;
361 }
362 }
363 return period;
364}
365
366
367
368
369
370
371
372
373unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
374{
375 unsigned int mask;
376 struct sock *sk = sock->sk;
377 const struct tcp_sock *tp = tcp_sk(sk);
378
379 sock_poll_wait(file, sk_sleep(sk), wait);
380 if (sk->sk_state == TCP_LISTEN)
381 return inet_csk_listen_poll(sk);
382
383
384
385
386
387
388 mask = 0;
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
418 mask |= POLLHUP;
419 if (sk->sk_shutdown & RCV_SHUTDOWN)
420 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
421
422
423 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
424 int target = sock_rcvlowat(sk, 0, INT_MAX);
425
426 if (tp->urg_seq == tp->copied_seq &&
427 !sock_flag(sk, SOCK_URGINLINE) &&
428 tp->urg_data)
429 target++;
430
431
432
433
434 if (tp->rcv_nxt - tp->copied_seq >= target)
435 mask |= POLLIN | POLLRDNORM;
436
437 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
438 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
439 mask |= POLLOUT | POLLWRNORM;
440 } else {
441 set_bit(SOCK_ASYNC_NOSPACE,
442 &sk->sk_socket->flags);
443 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
444
445
446
447
448
449 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
450 mask |= POLLOUT | POLLWRNORM;
451 }
452 } else
453 mask |= POLLOUT | POLLWRNORM;
454
455 if (tp->urg_data & TCP_URG_VALID)
456 mask |= POLLPRI;
457 }
458
459 smp_rmb();
460 if (sk->sk_err)
461 mask |= POLLERR;
462
463 return mask;
464}
465EXPORT_SYMBOL(tcp_poll);
466
467int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
468{
469 struct tcp_sock *tp = tcp_sk(sk);
470 int answ;
471
472 switch (cmd) {
473 case SIOCINQ:
474 if (sk->sk_state == TCP_LISTEN)
475 return -EINVAL;
476
477 lock_sock(sk);
478 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
479 answ = 0;
480 else if (sock_flag(sk, SOCK_URGINLINE) ||
481 !tp->urg_data ||
482 before(tp->urg_seq, tp->copied_seq) ||
483 !before(tp->urg_seq, tp->rcv_nxt)) {
484 struct sk_buff *skb;
485
486 answ = tp->rcv_nxt - tp->copied_seq;
487
488
489 skb = skb_peek_tail(&sk->sk_receive_queue);
490 if (answ && skb)
491 answ -= tcp_hdr(skb)->fin;
492 } else
493 answ = tp->urg_seq - tp->copied_seq;
494 release_sock(sk);
495 break;
496 case SIOCATMARK:
497 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
498 break;
499 case SIOCOUTQ:
500 if (sk->sk_state == TCP_LISTEN)
501 return -EINVAL;
502
503 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
504 answ = 0;
505 else
506 answ = tp->write_seq - tp->snd_una;
507 break;
508 case SIOCOUTQNSD:
509 if (sk->sk_state == TCP_LISTEN)
510 return -EINVAL;
511
512 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
513 answ = 0;
514 else
515 answ = tp->write_seq - tp->snd_nxt;
516 break;
517 default:
518 return -ENOIOCTLCMD;
519 }
520
521 return put_user(answ, (int __user *)arg);
522}
523EXPORT_SYMBOL(tcp_ioctl);
524
525static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
526{
527 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
528 tp->pushed_seq = tp->write_seq;
529}
530
531static inline int forced_push(const struct tcp_sock *tp)
532{
533 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
534}
535
536static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
537{
538 struct tcp_sock *tp = tcp_sk(sk);
539 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
540
541 skb->csum = 0;
542 tcb->seq = tcb->end_seq = tp->write_seq;
543 tcb->tcp_flags = TCPHDR_ACK;
544 tcb->sacked = 0;
545 skb_header_release(skb);
546 tcp_add_write_queue_tail(sk, skb);
547 sk->sk_wmem_queued += skb->truesize;
548 sk_mem_charge(sk, skb->truesize);
549 if (tp->nonagle & TCP_NAGLE_PUSH)
550 tp->nonagle &= ~TCP_NAGLE_PUSH;
551}
552
553static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
554{
555 if (flags & MSG_OOB)
556 tp->snd_up = tp->write_seq;
557}
558
559static inline void tcp_push(struct sock *sk, int flags, int mss_now,
560 int nonagle)
561{
562 if (tcp_send_head(sk)) {
563 struct tcp_sock *tp = tcp_sk(sk);
564
565 if (!(flags & MSG_MORE) || forced_push(tp))
566 tcp_mark_push(tp, tcp_write_queue_tail(sk));
567
568 tcp_mark_urg(tp, flags);
569 __tcp_push_pending_frames(sk, mss_now,
570 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
571 }
572}
573
574static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
575 unsigned int offset, size_t len)
576{
577 struct tcp_splice_state *tss = rd_desc->arg.data;
578 int ret;
579
580 ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
581 tss->flags);
582 if (ret > 0)
583 rd_desc->count -= ret;
584 return ret;
585}
586
587static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
588{
589
590 read_descriptor_t rd_desc = {
591 .arg.data = tss,
592 .count = tss->len,
593 };
594
595 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
596}
597
598
599
600
601
602
603
604
605
606
607
608
609
610ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
611 struct pipe_inode_info *pipe, size_t len,
612 unsigned int flags)
613{
614 struct sock *sk = sock->sk;
615 struct tcp_splice_state tss = {
616 .pipe = pipe,
617 .len = len,
618 .flags = flags,
619 };
620 long timeo;
621 ssize_t spliced;
622 int ret;
623
624 sock_rps_record_flow(sk);
625
626
627
628 if (unlikely(*ppos))
629 return -ESPIPE;
630
631 ret = spliced = 0;
632
633 lock_sock(sk);
634
635 timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
636 while (tss.len) {
637 ret = __tcp_splice_read(sk, &tss);
638 if (ret < 0)
639 break;
640 else if (!ret) {
641 if (spliced)
642 break;
643 if (sock_flag(sk, SOCK_DONE))
644 break;
645 if (sk->sk_err) {
646 ret = sock_error(sk);
647 break;
648 }
649 if (sk->sk_shutdown & RCV_SHUTDOWN)
650 break;
651 if (sk->sk_state == TCP_CLOSE) {
652
653
654
655
656 if (!sock_flag(sk, SOCK_DONE))
657 ret = -ENOTCONN;
658 break;
659 }
660 if (!timeo) {
661 ret = -EAGAIN;
662 break;
663 }
664 sk_wait_data(sk, &timeo);
665 if (signal_pending(current)) {
666 ret = sock_intr_errno(timeo);
667 break;
668 }
669 continue;
670 }
671 tss.len -= ret;
672 spliced += ret;
673
674 if (!timeo)
675 break;
676 release_sock(sk);
677 lock_sock(sk);
678
679 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
680 (sk->sk_shutdown & RCV_SHUTDOWN) ||
681 signal_pending(current))
682 break;
683 }
684
685 release_sock(sk);
686
687 if (spliced)
688 return spliced;
689
690 return ret;
691}
692EXPORT_SYMBOL(tcp_splice_read);
693
694struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
695{
696 struct sk_buff *skb;
697
698
699 size = ALIGN(size, 4);
700
701 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
702 if (skb) {
703 if (sk_wmem_schedule(sk, skb->truesize)) {
704
705
706
707
708 skb_reserve(skb, skb_tailroom(skb) - size);
709 return skb;
710 }
711 __kfree_skb(skb);
712 } else {
713 sk->sk_prot->enter_memory_pressure(sk);
714 sk_stream_moderate_sndbuf(sk);
715 }
716 return NULL;
717}
718
719static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
720 int large_allowed)
721{
722 struct tcp_sock *tp = tcp_sk(sk);
723 u32 xmit_size_goal, old_size_goal;
724
725 xmit_size_goal = mss_now;
726
727 if (large_allowed && sk_can_gso(sk)) {
728 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
729 inet_csk(sk)->icsk_af_ops->net_header_len -
730 inet_csk(sk)->icsk_ext_hdr_len -
731 tp->tcp_header_len);
732
733 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
734
735
736 old_size_goal = tp->xmit_size_goal_segs * mss_now;
737
738 if (likely(old_size_goal <= xmit_size_goal &&
739 old_size_goal + mss_now > xmit_size_goal)) {
740 xmit_size_goal = old_size_goal;
741 } else {
742 tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
743 xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
744 }
745 }
746
747 return max(xmit_size_goal, mss_now);
748}
749
750static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
751{
752 int mss_now;
753
754 mss_now = tcp_current_mss(sk);
755 *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
756
757 return mss_now;
758}
759
760static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
761 size_t psize, int flags)
762{
763 struct tcp_sock *tp = tcp_sk(sk);
764 int mss_now, size_goal;
765 int err;
766 ssize_t copied;
767 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
768
769
770 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
771 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
772 goto out_err;
773
774 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
775
776 mss_now = tcp_send_mss(sk, &size_goal, flags);
777 copied = 0;
778
779 err = -EPIPE;
780 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
781 goto out_err;
782
783 while (psize > 0) {
784 struct sk_buff *skb = tcp_write_queue_tail(sk);
785 struct page *page = pages[poffset / PAGE_SIZE];
786 int copy, i, can_coalesce;
787 int offset = poffset % PAGE_SIZE;
788 int size = min_t(size_t, psize, PAGE_SIZE - offset);
789
790 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
791new_segment:
792 if (!sk_stream_memory_free(sk))
793 goto wait_for_sndbuf;
794
795 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
796 if (!skb)
797 goto wait_for_memory;
798
799 skb_entail(sk, skb);
800 copy = size_goal;
801 }
802
803 if (copy > size)
804 copy = size;
805
806 i = skb_shinfo(skb)->nr_frags;
807 can_coalesce = skb_can_coalesce(skb, i, page, offset);
808 if (!can_coalesce && i >= MAX_SKB_FRAGS) {
809 tcp_mark_push(tp, skb);
810 goto new_segment;
811 }
812 if (!sk_wmem_schedule(sk, copy))
813 goto wait_for_memory;
814
815 if (can_coalesce) {
816 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
817 } else {
818 get_page(page);
819 skb_fill_page_desc(skb, i, page, offset, copy);
820 }
821
822 skb->len += copy;
823 skb->data_len += copy;
824 skb->truesize += copy;
825 sk->sk_wmem_queued += copy;
826 sk_mem_charge(sk, copy);
827 skb->ip_summed = CHECKSUM_PARTIAL;
828 tp->write_seq += copy;
829 TCP_SKB_CB(skb)->end_seq += copy;
830 skb_shinfo(skb)->gso_segs = 0;
831
832 if (!copied)
833 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
834
835 copied += copy;
836 poffset += copy;
837 if (!(psize -= copy))
838 goto out;
839
840 if (skb->len < size_goal || (flags & MSG_OOB))
841 continue;
842
843 if (forced_push(tp)) {
844 tcp_mark_push(tp, skb);
845 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
846 } else if (skb == tcp_send_head(sk))
847 tcp_push_one(sk, mss_now);
848 continue;
849
850wait_for_sndbuf:
851 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
852wait_for_memory:
853 if (copied)
854 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
855
856 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
857 goto do_error;
858
859 mss_now = tcp_send_mss(sk, &size_goal, flags);
860 }
861
862out:
863 if (copied)
864 tcp_push(sk, flags, mss_now, tp->nonagle);
865 return copied;
866
867do_error:
868 if (copied)
869 goto out;
870out_err:
871 return sk_stream_error(sk, flags, err);
872}
873
874int tcp_sendpage(struct sock *sk, struct page *page, int offset,
875 size_t size, int flags)
876{
877 ssize_t res;
878
879 if (!(sk->sk_route_caps & NETIF_F_SG) ||
880 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
881 return sock_no_sendpage(sk->sk_socket, page, offset, size,
882 flags);
883
884 lock_sock(sk);
885 res = do_tcp_sendpages(sk, &page, offset, size, flags);
886 release_sock(sk);
887 return res;
888}
889EXPORT_SYMBOL(tcp_sendpage);
890
891#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
892#define TCP_OFF(sk) (sk->sk_sndmsg_off)
893
894static inline int select_size(const struct sock *sk, int sg)
895{
896 const struct tcp_sock *tp = tcp_sk(sk);
897 int tmp = tp->mss_cache;
898
899 if (sg) {
900 if (sk_can_gso(sk))
901 tmp = 0;
902 else {
903 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
904
905 if (tmp >= pgbreak &&
906 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
907 tmp = pgbreak;
908 }
909 }
910
911 return tmp;
912}
913
914int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
915 size_t size)
916{
917 struct iovec *iov;
918 struct tcp_sock *tp = tcp_sk(sk);
919 struct sk_buff *skb;
920 int iovlen, flags;
921 int mss_now, size_goal;
922 int sg, err, copied;
923 long timeo;
924
925 lock_sock(sk);
926
927 flags = msg->msg_flags;
928 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
929
930
931 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
932 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
933 goto out_err;
934
935
936 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
937
938 mss_now = tcp_send_mss(sk, &size_goal, flags);
939
940
941 iovlen = msg->msg_iovlen;
942 iov = msg->msg_iov;
943 copied = 0;
944
945 err = -EPIPE;
946 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
947 goto out_err;
948
949 sg = sk->sk_route_caps & NETIF_F_SG;
950
951 while (--iovlen >= 0) {
952 size_t seglen = iov->iov_len;
953 unsigned char __user *from = iov->iov_base;
954
955 iov++;
956
957 while (seglen > 0) {
958 int copy = 0;
959 int max = size_goal;
960
961 skb = tcp_write_queue_tail(sk);
962 if (tcp_send_head(sk)) {
963 if (skb->ip_summed == CHECKSUM_NONE)
964 max = mss_now;
965 copy = max - skb->len;
966 }
967
968 if (copy <= 0) {
969new_segment:
970
971
972
973 if (!sk_stream_memory_free(sk))
974 goto wait_for_sndbuf;
975
976 skb = sk_stream_alloc_skb(sk,
977 select_size(sk, sg),
978 sk->sk_allocation);
979 if (!skb)
980 goto wait_for_memory;
981
982
983
984
985 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
986 skb->ip_summed = CHECKSUM_PARTIAL;
987
988 skb_entail(sk, skb);
989 copy = size_goal;
990 max = size_goal;
991 }
992
993
994 if (copy > seglen)
995 copy = seglen;
996
997
998 if (skb_tailroom(skb) > 0) {
999
1000 if (copy > skb_tailroom(skb))
1001 copy = skb_tailroom(skb);
1002 err = skb_add_data_nocache(sk, skb, from, copy);
1003 if (err)
1004 goto do_fault;
1005 } else {
1006 int merge = 0;
1007 int i = skb_shinfo(skb)->nr_frags;
1008 struct page *page = TCP_PAGE(sk);
1009 int off = TCP_OFF(sk);
1010
1011 if (skb_can_coalesce(skb, i, page, off) &&
1012 off != PAGE_SIZE) {
1013
1014
1015 merge = 1;
1016 } else if (i == MAX_SKB_FRAGS || !sg) {
1017
1018
1019
1020
1021 tcp_mark_push(tp, skb);
1022 goto new_segment;
1023 } else if (page) {
1024 if (off == PAGE_SIZE) {
1025 put_page(page);
1026 TCP_PAGE(sk) = page = NULL;
1027 off = 0;
1028 }
1029 } else
1030 off = 0;
1031
1032 if (copy > PAGE_SIZE - off)
1033 copy = PAGE_SIZE - off;
1034
1035 if (!sk_wmem_schedule(sk, copy))
1036 goto wait_for_memory;
1037
1038 if (!page) {
1039
1040 if (!(page = sk_stream_alloc_page(sk)))
1041 goto wait_for_memory;
1042 }
1043
1044
1045
1046 err = skb_copy_to_page_nocache(sk, from, skb,
1047 page, off, copy);
1048 if (err) {
1049
1050
1051
1052 if (!TCP_PAGE(sk)) {
1053 TCP_PAGE(sk) = page;
1054 TCP_OFF(sk) = 0;
1055 }
1056 goto do_error;
1057 }
1058
1059
1060 if (merge) {
1061 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1062 } else {
1063 skb_fill_page_desc(skb, i, page, off, copy);
1064 if (TCP_PAGE(sk)) {
1065 get_page(page);
1066 } else if (off + copy < PAGE_SIZE) {
1067 get_page(page);
1068 TCP_PAGE(sk) = page;
1069 }
1070 }
1071
1072 TCP_OFF(sk) = off + copy;
1073 }
1074
1075 if (!copied)
1076 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1077
1078 tp->write_seq += copy;
1079 TCP_SKB_CB(skb)->end_seq += copy;
1080 skb_shinfo(skb)->gso_segs = 0;
1081
1082 from += copy;
1083 copied += copy;
1084 if ((seglen -= copy) == 0 && iovlen == 0)
1085 goto out;
1086
1087 if (skb->len < max || (flags & MSG_OOB))
1088 continue;
1089
1090 if (forced_push(tp)) {
1091 tcp_mark_push(tp, skb);
1092 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1093 } else if (skb == tcp_send_head(sk))
1094 tcp_push_one(sk, mss_now);
1095 continue;
1096
1097wait_for_sndbuf:
1098 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1099wait_for_memory:
1100 if (copied)
1101 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1102
1103 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1104 goto do_error;
1105
1106 mss_now = tcp_send_mss(sk, &size_goal, flags);
1107 }
1108 }
1109
1110out:
1111 if (copied)
1112 tcp_push(sk, flags, mss_now, tp->nonagle);
1113 release_sock(sk);
1114 return copied;
1115
1116do_fault:
1117 if (!skb->len) {
1118 tcp_unlink_write_queue(skb, sk);
1119
1120
1121
1122 tcp_check_send_head(sk, skb);
1123 sk_wmem_free_skb(sk, skb);
1124 }
1125
1126do_error:
1127 if (copied)
1128 goto out;
1129out_err:
1130 err = sk_stream_error(sk, flags, err);
1131 release_sock(sk);
1132 return err;
1133}
1134EXPORT_SYMBOL(tcp_sendmsg);
1135
1136
1137
1138
1139
1140
1141static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1142{
1143 struct tcp_sock *tp = tcp_sk(sk);
1144
1145
1146 if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
1147 tp->urg_data == TCP_URG_READ)
1148 return -EINVAL;
1149
1150 if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
1151 return -ENOTCONN;
1152
1153 if (tp->urg_data & TCP_URG_VALID) {
1154 int err = 0;
1155 char c = tp->urg_data;
1156
1157 if (!(flags & MSG_PEEK))
1158 tp->urg_data = TCP_URG_READ;
1159
1160
1161 msg->msg_flags |= MSG_OOB;
1162
1163 if (len > 0) {
1164 if (!(flags & MSG_TRUNC))
1165 err = memcpy_toiovec(msg->msg_iov, &c, 1);
1166 len = 1;
1167 } else
1168 msg->msg_flags |= MSG_TRUNC;
1169
1170 return err ? -EFAULT : len;
1171 }
1172
1173 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
1174 return 0;
1175
1176
1177
1178
1179
1180
1181
1182 return -EAGAIN;
1183}
1184
1185
1186
1187
1188
1189
1190
1191void tcp_cleanup_rbuf(struct sock *sk, int copied)
1192{
1193 struct tcp_sock *tp = tcp_sk(sk);
1194 int time_to_ack = 0;
1195
1196 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1197
1198 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
1199 "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
1200 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
1201
1202 if (inet_csk_ack_scheduled(sk)) {
1203 const struct inet_connection_sock *icsk = inet_csk(sk);
1204
1205
1206 if (icsk->icsk_ack.blocked ||
1207
1208 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
1209
1210
1211
1212
1213
1214
1215 (copied > 0 &&
1216 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
1217 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
1218 !icsk->icsk_ack.pingpong)) &&
1219 !atomic_read(&sk->sk_rmem_alloc)))
1220 time_to_ack = 1;
1221 }
1222
1223
1224
1225
1226
1227
1228
1229 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1230 __u32 rcv_window_now = tcp_receive_window(tp);
1231
1232
1233 if (2*rcv_window_now <= tp->window_clamp) {
1234 __u32 new_window = __tcp_select_window(sk);
1235
1236
1237
1238
1239
1240
1241 if (new_window && new_window >= 2 * rcv_window_now)
1242 time_to_ack = 1;
1243 }
1244 }
1245 if (time_to_ack)
1246 tcp_send_ack(sk);
1247}
1248
1249static void tcp_prequeue_process(struct sock *sk)
1250{
1251 struct sk_buff *skb;
1252 struct tcp_sock *tp = tcp_sk(sk);
1253
1254 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1255
1256
1257
1258 local_bh_disable();
1259 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1260 sk_backlog_rcv(sk, skb);
1261 local_bh_enable();
1262
1263
1264 tp->ucopy.memory = 0;
1265}
1266
1267#ifdef CONFIG_NET_DMA
1268static void tcp_service_net_dma(struct sock *sk, bool wait)
1269{
1270 dma_cookie_t done, used;
1271 dma_cookie_t last_issued;
1272 struct tcp_sock *tp = tcp_sk(sk);
1273
1274 if (!tp->ucopy.dma_chan)
1275 return;
1276
1277 last_issued = tp->ucopy.dma_cookie;
1278 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1279
1280 do {
1281 if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1282 last_issued, &done,
1283 &used) == DMA_SUCCESS) {
1284
1285 __skb_queue_purge(&sk->sk_async_wait_queue);
1286 break;
1287 } else {
1288 struct sk_buff *skb;
1289 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1290 (dma_async_is_complete(skb->dma_cookie, done,
1291 used) == DMA_SUCCESS)) {
1292 __skb_dequeue(&sk->sk_async_wait_queue);
1293 kfree_skb(skb);
1294 }
1295 }
1296 } while (wait);
1297}
1298#endif
1299
1300static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1301{
1302 struct sk_buff *skb;
1303 u32 offset;
1304
1305 skb_queue_walk(&sk->sk_receive_queue, skb) {
1306 offset = seq - TCP_SKB_CB(skb)->seq;
1307 if (tcp_hdr(skb)->syn)
1308 offset--;
1309 if (offset < skb->len || tcp_hdr(skb)->fin) {
1310 *off = offset;
1311 return skb;
1312 }
1313 }
1314 return NULL;
1315}
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1329 sk_read_actor_t recv_actor)
1330{
1331 struct sk_buff *skb;
1332 struct tcp_sock *tp = tcp_sk(sk);
1333 u32 seq = tp->copied_seq;
1334 u32 offset;
1335 int copied = 0;
1336
1337 if (sk->sk_state == TCP_LISTEN)
1338 return -ENOTCONN;
1339 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1340 if (offset < skb->len) {
1341 int used;
1342 size_t len;
1343
1344 len = skb->len - offset;
1345
1346 if (tp->urg_data) {
1347 u32 urg_offset = tp->urg_seq - seq;
1348 if (urg_offset < len)
1349 len = urg_offset;
1350 if (!len)
1351 break;
1352 }
1353 used = recv_actor(desc, skb, offset, len);
1354 if (used < 0) {
1355 if (!copied)
1356 copied = used;
1357 break;
1358 } else if (used <= len) {
1359 seq += used;
1360 copied += used;
1361 offset += used;
1362 }
1363
1364
1365
1366
1367
1368
1369 skb = tcp_recv_skb(sk, seq-1, &offset);
1370 if (!skb || (offset+1 != skb->len))
1371 break;
1372 }
1373 if (tcp_hdr(skb)->fin) {
1374 sk_eat_skb(sk, skb, 0);
1375 ++seq;
1376 break;
1377 }
1378 sk_eat_skb(sk, skb, 0);
1379 if (!desc->count)
1380 break;
1381 tp->copied_seq = seq;
1382 }
1383 tp->copied_seq = seq;
1384
1385 tcp_rcv_space_adjust(sk);
1386
1387
1388 if (copied > 0)
1389 tcp_cleanup_rbuf(sk, copied);
1390 return copied;
1391}
1392EXPORT_SYMBOL(tcp_read_sock);
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1403 size_t len, int nonblock, int flags, int *addr_len)
1404{
1405 struct tcp_sock *tp = tcp_sk(sk);
1406 int copied = 0;
1407 u32 peek_seq;
1408 u32 *seq;
1409 unsigned long used;
1410 int err;
1411 int target;
1412 long timeo;
1413 struct task_struct *user_recv = NULL;
1414 int copied_early = 0;
1415 struct sk_buff *skb;
1416 u32 urg_hole = 0;
1417
1418 lock_sock(sk);
1419
1420 err = -ENOTCONN;
1421 if (sk->sk_state == TCP_LISTEN)
1422 goto out;
1423
1424 timeo = sock_rcvtimeo(sk, nonblock);
1425
1426
1427 if (flags & MSG_OOB)
1428 goto recv_urg;
1429
1430 seq = &tp->copied_seq;
1431 if (flags & MSG_PEEK) {
1432 peek_seq = tp->copied_seq;
1433 seq = &peek_seq;
1434 }
1435
1436 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1437
1438#ifdef CONFIG_NET_DMA
1439 tp->ucopy.dma_chan = NULL;
1440 preempt_disable();
1441 skb = skb_peek_tail(&sk->sk_receive_queue);
1442 {
1443 int available = 0;
1444
1445 if (skb)
1446 available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1447 if ((available < target) &&
1448 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1449 !sysctl_tcp_low_latency &&
1450 dma_find_channel(DMA_MEMCPY)) {
1451 preempt_enable_no_resched();
1452 tp->ucopy.pinned_list =
1453 dma_pin_iovec_pages(msg->msg_iov, len);
1454 } else {
1455 preempt_enable_no_resched();
1456 }
1457 }
1458#endif
1459
1460 do {
1461 u32 offset;
1462
1463
1464 if (tp->urg_data && tp->urg_seq == *seq) {
1465 if (copied)
1466 break;
1467 if (signal_pending(current)) {
1468 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
1469 break;
1470 }
1471 }
1472
1473
1474
1475 skb_queue_walk(&sk->sk_receive_queue, skb) {
1476
1477
1478
1479 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
1480 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
1481 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
1482 flags))
1483 break;
1484
1485 offset = *seq - TCP_SKB_CB(skb)->seq;
1486 if (tcp_hdr(skb)->syn)
1487 offset--;
1488 if (offset < skb->len)
1489 goto found_ok_skb;
1490 if (tcp_hdr(skb)->fin)
1491 goto found_fin_ok;
1492 WARN(!(flags & MSG_PEEK),
1493 "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
1494 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
1495 }
1496
1497
1498
1499 if (copied >= target && !sk->sk_backlog.tail)
1500 break;
1501
1502 if (copied) {
1503 if (sk->sk_err ||
1504 sk->sk_state == TCP_CLOSE ||
1505 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1506 !timeo ||
1507 signal_pending(current))
1508 break;
1509 } else {
1510 if (sock_flag(sk, SOCK_DONE))
1511 break;
1512
1513 if (sk->sk_err) {
1514 copied = sock_error(sk);
1515 break;
1516 }
1517
1518 if (sk->sk_shutdown & RCV_SHUTDOWN)
1519 break;
1520
1521 if (sk->sk_state == TCP_CLOSE) {
1522 if (!sock_flag(sk, SOCK_DONE)) {
1523
1524
1525
1526 copied = -ENOTCONN;
1527 break;
1528 }
1529 break;
1530 }
1531
1532 if (!timeo) {
1533 copied = -EAGAIN;
1534 break;
1535 }
1536
1537 if (signal_pending(current)) {
1538 copied = sock_intr_errno(timeo);
1539 break;
1540 }
1541 }
1542
1543 tcp_cleanup_rbuf(sk, copied);
1544
1545 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1546
1547 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1548 user_recv = current;
1549 tp->ucopy.task = user_recv;
1550 tp->ucopy.iov = msg->msg_iov;
1551 }
1552
1553 tp->ucopy.len = len;
1554
1555 WARN_ON(tp->copied_seq != tp->rcv_nxt &&
1556 !(flags & (MSG_PEEK | MSG_TRUNC)));
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584 if (!skb_queue_empty(&tp->ucopy.prequeue))
1585 goto do_prequeue;
1586
1587
1588 }
1589
1590#ifdef CONFIG_NET_DMA
1591 if (tp->ucopy.dma_chan)
1592 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1593#endif
1594 if (copied >= target) {
1595
1596 release_sock(sk);
1597 lock_sock(sk);
1598 } else
1599 sk_wait_data(sk, &timeo);
1600
1601#ifdef CONFIG_NET_DMA
1602 tcp_service_net_dma(sk, false);
1603 tp->ucopy.wakeup = 0;
1604#endif
1605
1606 if (user_recv) {
1607 int chunk;
1608
1609
1610
1611 if ((chunk = len - tp->ucopy.len) != 0) {
1612 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1613 len -= chunk;
1614 copied += chunk;
1615 }
1616
1617 if (tp->rcv_nxt == tp->copied_seq &&
1618 !skb_queue_empty(&tp->ucopy.prequeue)) {
1619do_prequeue:
1620 tcp_prequeue_process(sk);
1621
1622 if ((chunk = len - tp->ucopy.len) != 0) {
1623 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1624 len -= chunk;
1625 copied += chunk;
1626 }
1627 }
1628 }
1629 if ((flags & MSG_PEEK) &&
1630 (peek_seq - copied - urg_hole != tp->copied_seq)) {
1631 if (net_ratelimit())
1632 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1633 current->comm, task_pid_nr(current));
1634 peek_seq = tp->copied_seq;
1635 }
1636 continue;
1637
1638 found_ok_skb:
1639
1640 used = skb->len - offset;
1641 if (len < used)
1642 used = len;
1643
1644
1645 if (tp->urg_data) {
1646 u32 urg_offset = tp->urg_seq - *seq;
1647 if (urg_offset < used) {
1648 if (!urg_offset) {
1649 if (!sock_flag(sk, SOCK_URGINLINE)) {
1650 ++*seq;
1651 urg_hole++;
1652 offset++;
1653 used--;
1654 if (!used)
1655 goto skip_copy;
1656 }
1657 } else
1658 used = urg_offset;
1659 }
1660 }
1661
1662 if (!(flags & MSG_TRUNC)) {
1663#ifdef CONFIG_NET_DMA
1664 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1665 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1666
1667 if (tp->ucopy.dma_chan) {
1668 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1669 tp->ucopy.dma_chan, skb, offset,
1670 msg->msg_iov, used,
1671 tp->ucopy.pinned_list);
1672
1673 if (tp->ucopy.dma_cookie < 0) {
1674
1675 printk(KERN_ALERT "dma_cookie < 0\n");
1676
1677
1678 if (!copied)
1679 copied = -EFAULT;
1680 break;
1681 }
1682
1683 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1684
1685 if ((offset + used) == skb->len)
1686 copied_early = 1;
1687
1688 } else
1689#endif
1690 {
1691 err = skb_copy_datagram_iovec(skb, offset,
1692 msg->msg_iov, used);
1693 if (err) {
1694
1695 if (!copied)
1696 copied = -EFAULT;
1697 break;
1698 }
1699 }
1700 }
1701
1702 *seq += used;
1703 copied += used;
1704 len -= used;
1705
1706 tcp_rcv_space_adjust(sk);
1707
1708skip_copy:
1709 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1710 tp->urg_data = 0;
1711 tcp_fast_path_check(sk);
1712 }
1713 if (used + offset < skb->len)
1714 continue;
1715
1716 if (tcp_hdr(skb)->fin)
1717 goto found_fin_ok;
1718 if (!(flags & MSG_PEEK)) {
1719 sk_eat_skb(sk, skb, copied_early);
1720 copied_early = 0;
1721 }
1722 continue;
1723
1724 found_fin_ok:
1725
1726 ++*seq;
1727 if (!(flags & MSG_PEEK)) {
1728 sk_eat_skb(sk, skb, copied_early);
1729 copied_early = 0;
1730 }
1731 break;
1732 } while (len > 0);
1733
1734 if (user_recv) {
1735 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1736 int chunk;
1737
1738 tp->ucopy.len = copied > 0 ? len : 0;
1739
1740 tcp_prequeue_process(sk);
1741
1742 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1743 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1744 len -= chunk;
1745 copied += chunk;
1746 }
1747 }
1748
1749 tp->ucopy.task = NULL;
1750 tp->ucopy.len = 0;
1751 }
1752
1753#ifdef CONFIG_NET_DMA
1754 tcp_service_net_dma(sk, true);
1755 tp->ucopy.dma_chan = NULL;
1756
1757 if (tp->ucopy.pinned_list) {
1758 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1759 tp->ucopy.pinned_list = NULL;
1760 }
1761#endif
1762
1763
1764
1765
1766
1767
1768 tcp_cleanup_rbuf(sk, copied);
1769
1770 release_sock(sk);
1771 return copied;
1772
1773out:
1774 release_sock(sk);
1775 return err;
1776
1777recv_urg:
1778 err = tcp_recv_urg(sk, msg, len, flags);
1779 goto out;
1780}
1781EXPORT_SYMBOL(tcp_recvmsg);
1782
1783void tcp_set_state(struct sock *sk, int state)
1784{
1785 int oldstate = sk->sk_state;
1786
1787 switch (state) {
1788 case TCP_ESTABLISHED:
1789 if (oldstate != TCP_ESTABLISHED)
1790 TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1791 break;
1792
1793 case TCP_CLOSE:
1794 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1795 TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
1796
1797 sk->sk_prot->unhash(sk);
1798 if (inet_csk(sk)->icsk_bind_hash &&
1799 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1800 inet_put_port(sk);
1801
1802 default:
1803 if (oldstate == TCP_ESTABLISHED)
1804 TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1805 }
1806
1807
1808
1809
1810 sk->sk_state = state;
1811
1812#ifdef STATE_TRACE
1813 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
1814#endif
1815}
1816EXPORT_SYMBOL_GPL(tcp_set_state);
1817
1818
1819
1820
1821
1822
1823
1824
1825static const unsigned char new_state[16] = {
1826
1827 TCP_CLOSE,
1828 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1829 TCP_CLOSE,
1830 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1831 TCP_FIN_WAIT1,
1832 TCP_FIN_WAIT2,
1833 TCP_CLOSE,
1834 TCP_CLOSE,
1835 TCP_LAST_ACK | TCP_ACTION_FIN,
1836 TCP_LAST_ACK,
1837 TCP_CLOSE,
1838 TCP_CLOSING,
1839};
1840
1841static int tcp_close_state(struct sock *sk)
1842{
1843 int next = (int)new_state[sk->sk_state];
1844 int ns = next & TCP_STATE_MASK;
1845
1846 tcp_set_state(sk, ns);
1847
1848 return next & TCP_ACTION_FIN;
1849}
1850
1851
1852
1853
1854
1855
1856void tcp_shutdown(struct sock *sk, int how)
1857{
1858
1859
1860
1861
1862 if (!(how & SEND_SHUTDOWN))
1863 return;
1864
1865
1866 if ((1 << sk->sk_state) &
1867 (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1868 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1869
1870 if (tcp_close_state(sk))
1871 tcp_send_fin(sk);
1872 }
1873}
1874EXPORT_SYMBOL(tcp_shutdown);
1875
1876void tcp_close(struct sock *sk, long timeout)
1877{
1878 struct sk_buff *skb;
1879 int data_was_unread = 0;
1880 int state;
1881
1882 lock_sock(sk);
1883 sk->sk_shutdown = SHUTDOWN_MASK;
1884
1885 if (sk->sk_state == TCP_LISTEN) {
1886 tcp_set_state(sk, TCP_CLOSE);
1887
1888
1889 inet_csk_listen_stop(sk);
1890
1891 goto adjudge_to_death;
1892 }
1893
1894
1895
1896
1897
1898 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1899 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
1900 tcp_hdr(skb)->fin;
1901 data_was_unread += len;
1902 __kfree_skb(skb);
1903 }
1904
1905 sk_mem_reclaim(sk);
1906
1907
1908 if (sk->sk_state == TCP_CLOSE)
1909 goto adjudge_to_death;
1910
1911
1912
1913
1914
1915
1916
1917
1918 if (data_was_unread) {
1919
1920 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1921 tcp_set_state(sk, TCP_CLOSE);
1922 tcp_send_active_reset(sk, sk->sk_allocation);
1923 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1924
1925 sk->sk_prot->disconnect(sk, 0);
1926 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
1927 } else if (tcp_close_state(sk)) {
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953 tcp_send_fin(sk);
1954 }
1955
1956 sk_stream_wait_close(sk, timeout);
1957
1958adjudge_to_death:
1959 state = sk->sk_state;
1960 sock_hold(sk);
1961 sock_orphan(sk);
1962
1963
1964 release_sock(sk);
1965
1966
1967
1968
1969
1970 local_bh_disable();
1971 bh_lock_sock(sk);
1972 WARN_ON(sock_owned_by_user(sk));
1973
1974 percpu_counter_inc(sk->sk_prot->orphan_count);
1975
1976
1977 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
1978 goto out;
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994 if (sk->sk_state == TCP_FIN_WAIT2) {
1995 struct tcp_sock *tp = tcp_sk(sk);
1996 if (tp->linger2 < 0) {
1997 tcp_set_state(sk, TCP_CLOSE);
1998 tcp_send_active_reset(sk, GFP_ATOMIC);
1999 NET_INC_STATS_BH(sock_net(sk),
2000 LINUX_MIB_TCPABORTONLINGER);
2001 } else {
2002 const int tmo = tcp_fin_time(sk);
2003
2004 if (tmo > TCP_TIMEWAIT_LEN) {
2005 inet_csk_reset_keepalive_timer(sk,
2006 tmo - TCP_TIMEWAIT_LEN);
2007 } else {
2008 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
2009 goto out;
2010 }
2011 }
2012 }
2013 if (sk->sk_state != TCP_CLOSE) {
2014 sk_mem_reclaim(sk);
2015 if (tcp_too_many_orphans(sk, 0)) {
2016 if (net_ratelimit())
2017 printk(KERN_INFO "TCP: too many of orphaned "
2018 "sockets\n");
2019 tcp_set_state(sk, TCP_CLOSE);
2020 tcp_send_active_reset(sk, GFP_ATOMIC);
2021 NET_INC_STATS_BH(sock_net(sk),
2022 LINUX_MIB_TCPABORTONMEMORY);
2023 }
2024 }
2025
2026 if (sk->sk_state == TCP_CLOSE)
2027 inet_csk_destroy_sock(sk);
2028
2029
2030out:
2031 bh_unlock_sock(sk);
2032 local_bh_enable();
2033 sock_put(sk);
2034}
2035EXPORT_SYMBOL(tcp_close);
2036
2037
2038
2039static inline int tcp_need_reset(int state)
2040{
2041 return (1 << state) &
2042 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2043 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2044}
2045
2046int tcp_disconnect(struct sock *sk, int flags)
2047{
2048 struct inet_sock *inet = inet_sk(sk);
2049 struct inet_connection_sock *icsk = inet_csk(sk);
2050 struct tcp_sock *tp = tcp_sk(sk);
2051 int err = 0;
2052 int old_state = sk->sk_state;
2053
2054 if (old_state != TCP_CLOSE)
2055 tcp_set_state(sk, TCP_CLOSE);
2056
2057
2058 if (old_state == TCP_LISTEN) {
2059 inet_csk_listen_stop(sk);
2060 } else if (tcp_need_reset(old_state) ||
2061 (tp->snd_nxt != tp->write_seq &&
2062 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2063
2064
2065
2066 tcp_send_active_reset(sk, gfp_any());
2067 sk->sk_err = ECONNRESET;
2068 } else if (old_state == TCP_SYN_SENT)
2069 sk->sk_err = ECONNRESET;
2070
2071 tcp_clear_xmit_timers(sk);
2072 __skb_queue_purge(&sk->sk_receive_queue);
2073 tcp_write_queue_purge(sk);
2074 __skb_queue_purge(&tp->out_of_order_queue);
2075#ifdef CONFIG_NET_DMA
2076 __skb_queue_purge(&sk->sk_async_wait_queue);
2077#endif
2078
2079 inet->inet_dport = 0;
2080
2081 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2082 inet_reset_saddr(sk);
2083
2084 sk->sk_shutdown = 0;
2085 sock_reset_flag(sk, SOCK_DONE);
2086 tp->srtt = 0;
2087 if ((tp->write_seq += tp->max_window + 2) == 0)
2088 tp->write_seq = 1;
2089 icsk->icsk_backoff = 0;
2090 tp->snd_cwnd = 2;
2091 icsk->icsk_probes_out = 0;
2092 tp->packets_out = 0;
2093 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2094 tp->snd_cwnd_cnt = 0;
2095 tp->bytes_acked = 0;
2096 tp->window_clamp = 0;
2097 tcp_set_ca_state(sk, TCP_CA_Open);
2098 tcp_clear_retrans(tp);
2099 inet_csk_delack_init(sk);
2100 tcp_init_send_head(sk);
2101 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2102 __sk_dst_reset(sk);
2103
2104 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2105
2106 sk->sk_error_report(sk);
2107 return err;
2108}
2109EXPORT_SYMBOL(tcp_disconnect);
2110
2111
2112
2113
2114static int do_tcp_setsockopt(struct sock *sk, int level,
2115 int optname, char __user *optval, unsigned int optlen)
2116{
2117 struct tcp_sock *tp = tcp_sk(sk);
2118 struct inet_connection_sock *icsk = inet_csk(sk);
2119 int val;
2120 int err = 0;
2121
2122
2123 switch (optname) {
2124 case TCP_CONGESTION: {
2125 char name[TCP_CA_NAME_MAX];
2126
2127 if (optlen < 1)
2128 return -EINVAL;
2129
2130 val = strncpy_from_user(name, optval,
2131 min_t(long, TCP_CA_NAME_MAX-1, optlen));
2132 if (val < 0)
2133 return -EFAULT;
2134 name[val] = 0;
2135
2136 lock_sock(sk);
2137 err = tcp_set_congestion_control(sk, name);
2138 release_sock(sk);
2139 return err;
2140 }
2141 case TCP_COOKIE_TRANSACTIONS: {
2142 struct tcp_cookie_transactions ctd;
2143 struct tcp_cookie_values *cvp = NULL;
2144
2145 if (sizeof(ctd) > optlen)
2146 return -EINVAL;
2147 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2148 return -EFAULT;
2149
2150 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2151 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2152 return -EINVAL;
2153
2154 if (ctd.tcpct_cookie_desired == 0) {
2155
2156 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2157 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2158 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2159 return -EINVAL;
2160 }
2161
2162 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2163
2164 lock_sock(sk);
2165 if (tp->cookie_values != NULL) {
2166 kref_put(&tp->cookie_values->kref,
2167 tcp_cookie_values_release);
2168 tp->cookie_values = NULL;
2169 }
2170 tp->rx_opt.cookie_in_always = 0;
2171 tp->rx_opt.cookie_out_never = 1;
2172 release_sock(sk);
2173 return err;
2174 }
2175
2176
2177
2178 if (ctd.tcpct_used > 0 ||
2179 (tp->cookie_values == NULL &&
2180 (sysctl_tcp_cookie_size > 0 ||
2181 ctd.tcpct_cookie_desired > 0 ||
2182 ctd.tcpct_s_data_desired > 0))) {
2183 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2184 GFP_KERNEL);
2185 if (cvp == NULL)
2186 return -ENOMEM;
2187
2188 kref_init(&cvp->kref);
2189 }
2190 lock_sock(sk);
2191 tp->rx_opt.cookie_in_always =
2192 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2193 tp->rx_opt.cookie_out_never = 0;
2194
2195 if (tp->cookie_values != NULL) {
2196 if (cvp != NULL) {
2197
2198
2199
2200
2201 kref_put(&tp->cookie_values->kref,
2202 tcp_cookie_values_release);
2203 } else {
2204 cvp = tp->cookie_values;
2205 }
2206 }
2207
2208 if (cvp != NULL) {
2209 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2210
2211 if (ctd.tcpct_used > 0) {
2212 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2213 ctd.tcpct_used);
2214 cvp->s_data_desired = ctd.tcpct_used;
2215 cvp->s_data_constant = 1;
2216 } else {
2217
2218 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2219 cvp->s_data_constant = 0;
2220 }
2221
2222 tp->cookie_values = cvp;
2223 }
2224 release_sock(sk);
2225 return err;
2226 }
2227 default:
2228
2229 break;
2230 }
2231
2232 if (optlen < sizeof(int))
2233 return -EINVAL;
2234
2235 if (get_user(val, (int __user *)optval))
2236 return -EFAULT;
2237
2238 lock_sock(sk);
2239
2240 switch (optname) {
2241 case TCP_MAXSEG:
2242
2243
2244
2245 if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
2246 err = -EINVAL;
2247 break;
2248 }
2249 tp->rx_opt.user_mss = val;
2250 break;
2251
2252 case TCP_NODELAY:
2253 if (val) {
2254
2255
2256
2257
2258
2259
2260
2261
2262 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
2263 tcp_push_pending_frames(sk);
2264 } else {
2265 tp->nonagle &= ~TCP_NAGLE_OFF;
2266 }
2267 break;
2268
2269 case TCP_THIN_LINEAR_TIMEOUTS:
2270 if (val < 0 || val > 1)
2271 err = -EINVAL;
2272 else
2273 tp->thin_lto = val;
2274 break;
2275
2276 case TCP_THIN_DUPACK:
2277 if (val < 0 || val > 1)
2278 err = -EINVAL;
2279 else
2280 tp->thin_dupack = val;
2281 break;
2282
2283 case TCP_CORK:
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295 if (val) {
2296 tp->nonagle |= TCP_NAGLE_CORK;
2297 } else {
2298 tp->nonagle &= ~TCP_NAGLE_CORK;
2299 if (tp->nonagle&TCP_NAGLE_OFF)
2300 tp->nonagle |= TCP_NAGLE_PUSH;
2301 tcp_push_pending_frames(sk);
2302 }
2303 break;
2304
2305 case TCP_KEEPIDLE:
2306 if (val < 1 || val > MAX_TCP_KEEPIDLE)
2307 err = -EINVAL;
2308 else {
2309 tp->keepalive_time = val * HZ;
2310 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2311 !((1 << sk->sk_state) &
2312 (TCPF_CLOSE | TCPF_LISTEN))) {
2313 u32 elapsed = keepalive_time_elapsed(tp);
2314 if (tp->keepalive_time > elapsed)
2315 elapsed = tp->keepalive_time - elapsed;
2316 else
2317 elapsed = 0;
2318 inet_csk_reset_keepalive_timer(sk, elapsed);
2319 }
2320 }
2321 break;
2322 case TCP_KEEPINTVL:
2323 if (val < 1 || val > MAX_TCP_KEEPINTVL)
2324 err = -EINVAL;
2325 else
2326 tp->keepalive_intvl = val * HZ;
2327 break;
2328 case TCP_KEEPCNT:
2329 if (val < 1 || val > MAX_TCP_KEEPCNT)
2330 err = -EINVAL;
2331 else
2332 tp->keepalive_probes = val;
2333 break;
2334 case TCP_SYNCNT:
2335 if (val < 1 || val > MAX_TCP_SYNCNT)
2336 err = -EINVAL;
2337 else
2338 icsk->icsk_syn_retries = val;
2339 break;
2340
2341 case TCP_LINGER2:
2342 if (val < 0)
2343 tp->linger2 = -1;
2344 else if (val > sysctl_tcp_fin_timeout / HZ)
2345 tp->linger2 = 0;
2346 else
2347 tp->linger2 = val * HZ;
2348 break;
2349
2350 case TCP_DEFER_ACCEPT:
2351
2352 icsk->icsk_accept_queue.rskq_defer_accept =
2353 secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
2354 TCP_RTO_MAX / HZ);
2355 break;
2356
2357 case TCP_WINDOW_CLAMP:
2358 if (!val) {
2359 if (sk->sk_state != TCP_CLOSE) {
2360 err = -EINVAL;
2361 break;
2362 }
2363 tp->window_clamp = 0;
2364 } else
2365 tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
2366 SOCK_MIN_RCVBUF / 2 : val;
2367 break;
2368
2369 case TCP_QUICKACK:
2370 if (!val) {
2371 icsk->icsk_ack.pingpong = 1;
2372 } else {
2373 icsk->icsk_ack.pingpong = 0;
2374 if ((1 << sk->sk_state) &
2375 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
2376 inet_csk_ack_scheduled(sk)) {
2377 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
2378 tcp_cleanup_rbuf(sk, 1);
2379 if (!(val & 1))
2380 icsk->icsk_ack.pingpong = 1;
2381 }
2382 }
2383 break;
2384
2385#ifdef CONFIG_TCP_MD5SIG
2386 case TCP_MD5SIG:
2387
2388 err = tp->af_specific->md5_parse(sk, optval, optlen);
2389 break;
2390#endif
2391 case TCP_USER_TIMEOUT:
2392
2393
2394
2395 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2396 break;
2397 default:
2398 err = -ENOPROTOOPT;
2399 break;
2400 }
2401
2402 release_sock(sk);
2403 return err;
2404}
2405
2406int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2407 unsigned int optlen)
2408{
2409 const struct inet_connection_sock *icsk = inet_csk(sk);
2410
2411 if (level != SOL_TCP)
2412 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
2413 optval, optlen);
2414 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2415}
2416EXPORT_SYMBOL(tcp_setsockopt);
2417
2418#ifdef CONFIG_COMPAT
2419int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2420 char __user *optval, unsigned int optlen)
2421{
2422 if (level != SOL_TCP)
2423 return inet_csk_compat_setsockopt(sk, level, optname,
2424 optval, optlen);
2425 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2426}
2427EXPORT_SYMBOL(compat_tcp_setsockopt);
2428#endif
2429
2430
2431void tcp_get_info(const struct sock *sk, struct tcp_info *info)
2432{
2433 const struct tcp_sock *tp = tcp_sk(sk);
2434 const struct inet_connection_sock *icsk = inet_csk(sk);
2435 u32 now = tcp_time_stamp;
2436
2437 memset(info, 0, sizeof(*info));
2438
2439 info->tcpi_state = sk->sk_state;
2440 info->tcpi_ca_state = icsk->icsk_ca_state;
2441 info->tcpi_retransmits = icsk->icsk_retransmits;
2442 info->tcpi_probes = icsk->icsk_probes_out;
2443 info->tcpi_backoff = icsk->icsk_backoff;
2444
2445 if (tp->rx_opt.tstamp_ok)
2446 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
2447 if (tcp_is_sack(tp))
2448 info->tcpi_options |= TCPI_OPT_SACK;
2449 if (tp->rx_opt.wscale_ok) {
2450 info->tcpi_options |= TCPI_OPT_WSCALE;
2451 info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
2452 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
2453 }
2454
2455 if (tp->ecn_flags & TCP_ECN_OK)
2456 info->tcpi_options |= TCPI_OPT_ECN;
2457 if (tp->ecn_flags & TCP_ECN_SEEN)
2458 info->tcpi_options |= TCPI_OPT_ECN_SEEN;
2459
2460 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
2461 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
2462 info->tcpi_snd_mss = tp->mss_cache;
2463 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2464
2465 if (sk->sk_state == TCP_LISTEN) {
2466 info->tcpi_unacked = sk->sk_ack_backlog;
2467 info->tcpi_sacked = sk->sk_max_ack_backlog;
2468 } else {
2469 info->tcpi_unacked = tp->packets_out;
2470 info->tcpi_sacked = tp->sacked_out;
2471 }
2472 info->tcpi_lost = tp->lost_out;
2473 info->tcpi_retrans = tp->retrans_out;
2474 info->tcpi_fackets = tp->fackets_out;
2475
2476 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
2477 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
2478 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
2479
2480 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
2481 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
2482 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
2483 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
2484 info->tcpi_snd_ssthresh = tp->snd_ssthresh;
2485 info->tcpi_snd_cwnd = tp->snd_cwnd;
2486 info->tcpi_advmss = tp->advmss;
2487 info->tcpi_reordering = tp->reordering;
2488
2489 info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
2490 info->tcpi_rcv_space = tp->rcvq_space.space;
2491
2492 info->tcpi_total_retrans = tp->total_retrans;
2493}
2494EXPORT_SYMBOL_GPL(tcp_get_info);
2495
2496static int do_tcp_getsockopt(struct sock *sk, int level,
2497 int optname, char __user *optval, int __user *optlen)
2498{
2499 struct inet_connection_sock *icsk = inet_csk(sk);
2500 struct tcp_sock *tp = tcp_sk(sk);
2501 int val, len;
2502
2503 if (get_user(len, optlen))
2504 return -EFAULT;
2505
2506 len = min_t(unsigned int, len, sizeof(int));
2507
2508 if (len < 0)
2509 return -EINVAL;
2510
2511 switch (optname) {
2512 case TCP_MAXSEG:
2513 val = tp->mss_cache;
2514 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2515 val = tp->rx_opt.user_mss;
2516 break;
2517 case TCP_NODELAY:
2518 val = !!(tp->nonagle&TCP_NAGLE_OFF);
2519 break;
2520 case TCP_CORK:
2521 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2522 break;
2523 case TCP_KEEPIDLE:
2524 val = keepalive_time_when(tp) / HZ;
2525 break;
2526 case TCP_KEEPINTVL:
2527 val = keepalive_intvl_when(tp) / HZ;
2528 break;
2529 case TCP_KEEPCNT:
2530 val = keepalive_probes(tp);
2531 break;
2532 case TCP_SYNCNT:
2533 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
2534 break;
2535 case TCP_LINGER2:
2536 val = tp->linger2;
2537 if (val >= 0)
2538 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2539 break;
2540 case TCP_DEFER_ACCEPT:
2541 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
2542 TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
2543 break;
2544 case TCP_WINDOW_CLAMP:
2545 val = tp->window_clamp;
2546 break;
2547 case TCP_INFO: {
2548 struct tcp_info info;
2549
2550 if (get_user(len, optlen))
2551 return -EFAULT;
2552
2553 tcp_get_info(sk, &info);
2554
2555 len = min_t(unsigned int, len, sizeof(info));
2556 if (put_user(len, optlen))
2557 return -EFAULT;
2558 if (copy_to_user(optval, &info, len))
2559 return -EFAULT;
2560 return 0;
2561 }
2562 case TCP_QUICKACK:
2563 val = !icsk->icsk_ack.pingpong;
2564 break;
2565
2566 case TCP_CONGESTION:
2567 if (get_user(len, optlen))
2568 return -EFAULT;
2569 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
2570 if (put_user(len, optlen))
2571 return -EFAULT;
2572 if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
2573 return -EFAULT;
2574 return 0;
2575
2576 case TCP_COOKIE_TRANSACTIONS: {
2577 struct tcp_cookie_transactions ctd;
2578 struct tcp_cookie_values *cvp = tp->cookie_values;
2579
2580 if (get_user(len, optlen))
2581 return -EFAULT;
2582 if (len < sizeof(ctd))
2583 return -EINVAL;
2584
2585 memset(&ctd, 0, sizeof(ctd));
2586 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2587 TCP_COOKIE_IN_ALWAYS : 0)
2588 | (tp->rx_opt.cookie_out_never ?
2589 TCP_COOKIE_OUT_NEVER : 0);
2590
2591 if (cvp != NULL) {
2592 ctd.tcpct_flags |= (cvp->s_data_in ?
2593 TCP_S_DATA_IN : 0)
2594 | (cvp->s_data_out ?
2595 TCP_S_DATA_OUT : 0);
2596
2597 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2598 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2599
2600 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2601 cvp->cookie_pair_size);
2602 ctd.tcpct_used = cvp->cookie_pair_size;
2603 }
2604
2605 if (put_user(sizeof(ctd), optlen))
2606 return -EFAULT;
2607 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2608 return -EFAULT;
2609 return 0;
2610 }
2611 case TCP_THIN_LINEAR_TIMEOUTS:
2612 val = tp->thin_lto;
2613 break;
2614 case TCP_THIN_DUPACK:
2615 val = tp->thin_dupack;
2616 break;
2617
2618 case TCP_USER_TIMEOUT:
2619 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2620 break;
2621 default:
2622 return -ENOPROTOOPT;
2623 }
2624
2625 if (put_user(len, optlen))
2626 return -EFAULT;
2627 if (copy_to_user(optval, &val, len))
2628 return -EFAULT;
2629 return 0;
2630}
2631
2632int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2633 int __user *optlen)
2634{
2635 struct inet_connection_sock *icsk = inet_csk(sk);
2636
2637 if (level != SOL_TCP)
2638 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
2639 optval, optlen);
2640 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2641}
2642EXPORT_SYMBOL(tcp_getsockopt);
2643
2644#ifdef CONFIG_COMPAT
2645int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2646 char __user *optval, int __user *optlen)
2647{
2648 if (level != SOL_TCP)
2649 return inet_csk_compat_getsockopt(sk, level, optname,
2650 optval, optlen);
2651 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2652}
2653EXPORT_SYMBOL(compat_tcp_getsockopt);
2654#endif
2655
2656struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
2657{
2658 struct sk_buff *segs = ERR_PTR(-EINVAL);
2659 struct tcphdr *th;
2660 unsigned thlen;
2661 unsigned int seq;
2662 __be32 delta;
2663 unsigned int oldlen;
2664 unsigned int mss;
2665
2666 if (!pskb_may_pull(skb, sizeof(*th)))
2667 goto out;
2668
2669 th = tcp_hdr(skb);
2670 thlen = th->doff * 4;
2671 if (thlen < sizeof(*th))
2672 goto out;
2673
2674 if (!pskb_may_pull(skb, thlen))
2675 goto out;
2676
2677 oldlen = (u16)~skb->len;
2678 __skb_pull(skb, thlen);
2679
2680 mss = skb_shinfo(skb)->gso_size;
2681 if (unlikely(skb->len <= mss))
2682 goto out;
2683
2684 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2685
2686 int type = skb_shinfo(skb)->gso_type;
2687
2688 if (unlikely(type &
2689 ~(SKB_GSO_TCPV4 |
2690 SKB_GSO_DODGY |
2691 SKB_GSO_TCP_ECN |
2692 SKB_GSO_TCPV6 |
2693 0) ||
2694 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
2695 goto out;
2696
2697 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
2698
2699 segs = NULL;
2700 goto out;
2701 }
2702
2703 segs = skb_segment(skb, features);
2704 if (IS_ERR(segs))
2705 goto out;
2706
2707 delta = htonl(oldlen + (thlen + mss));
2708
2709 skb = segs;
2710 th = tcp_hdr(skb);
2711 seq = ntohl(th->seq);
2712
2713 do {
2714 th->fin = th->psh = 0;
2715
2716 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2717 (__force u32)delta));
2718 if (skb->ip_summed != CHECKSUM_PARTIAL)
2719 th->check =
2720 csum_fold(csum_partial(skb_transport_header(skb),
2721 thlen, skb->csum));
2722
2723 seq += mss;
2724 skb = skb->next;
2725 th = tcp_hdr(skb);
2726
2727 th->seq = htonl(seq);
2728 th->cwr = 0;
2729 } while (skb->next);
2730
2731 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
2732 skb->data_len);
2733 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2734 (__force u32)delta));
2735 if (skb->ip_summed != CHECKSUM_PARTIAL)
2736 th->check = csum_fold(csum_partial(skb_transport_header(skb),
2737 thlen, skb->csum));
2738
2739out:
2740 return segs;
2741}
2742EXPORT_SYMBOL(tcp_tso_segment);
2743
2744struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2745{
2746 struct sk_buff **pp = NULL;
2747 struct sk_buff *p;
2748 struct tcphdr *th;
2749 struct tcphdr *th2;
2750 unsigned int len;
2751 unsigned int thlen;
2752 __be32 flags;
2753 unsigned int mss = 1;
2754 unsigned int hlen;
2755 unsigned int off;
2756 int flush = 1;
2757 int i;
2758
2759 off = skb_gro_offset(skb);
2760 hlen = off + sizeof(*th);
2761 th = skb_gro_header_fast(skb, off);
2762 if (skb_gro_header_hard(skb, hlen)) {
2763 th = skb_gro_header_slow(skb, hlen, off);
2764 if (unlikely(!th))
2765 goto out;
2766 }
2767
2768 thlen = th->doff * 4;
2769 if (thlen < sizeof(*th))
2770 goto out;
2771
2772 hlen = off + thlen;
2773 if (skb_gro_header_hard(skb, hlen)) {
2774 th = skb_gro_header_slow(skb, hlen, off);
2775 if (unlikely(!th))
2776 goto out;
2777 }
2778
2779 skb_gro_pull(skb, thlen);
2780
2781 len = skb_gro_len(skb);
2782 flags = tcp_flag_word(th);
2783
2784 for (; (p = *head); head = &p->next) {
2785 if (!NAPI_GRO_CB(p)->same_flow)
2786 continue;
2787
2788 th2 = tcp_hdr(p);
2789
2790 if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
2791 NAPI_GRO_CB(p)->same_flow = 0;
2792 continue;
2793 }
2794
2795 goto found;
2796 }
2797
2798 goto out_check_final;
2799
2800found:
2801 flush = NAPI_GRO_CB(p)->flush;
2802 flush |= (__force int)(flags & TCP_FLAG_CWR);
2803 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
2804 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
2805 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
2806 for (i = sizeof(*th); i < thlen; i += 4)
2807 flush |= *(u32 *)((u8 *)th + i) ^
2808 *(u32 *)((u8 *)th2 + i);
2809
2810 mss = skb_shinfo(p)->gso_size;
2811
2812 flush |= (len - 1) >= mss;
2813 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
2814
2815 if (flush || skb_gro_receive(head, skb)) {
2816 mss = 1;
2817 goto out_check_final;
2818 }
2819
2820 p = *head;
2821 th2 = tcp_hdr(p);
2822 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
2823
2824out_check_final:
2825 flush = len < mss;
2826 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
2827 TCP_FLAG_RST | TCP_FLAG_SYN |
2828 TCP_FLAG_FIN));
2829
2830 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
2831 pp = head;
2832
2833out:
2834 NAPI_GRO_CB(skb)->flush |= flush;
2835
2836 return pp;
2837}
2838EXPORT_SYMBOL(tcp_gro_receive);
2839
2840int tcp_gro_complete(struct sk_buff *skb)
2841{
2842 struct tcphdr *th = tcp_hdr(skb);
2843
2844 skb->csum_start = skb_transport_header(skb) - skb->head;
2845 skb->csum_offset = offsetof(struct tcphdr, check);
2846 skb->ip_summed = CHECKSUM_PARTIAL;
2847
2848 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
2849
2850 if (th->cwr)
2851 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
2852
2853 return 0;
2854}
2855EXPORT_SYMBOL(tcp_gro_complete);
2856
2857#ifdef CONFIG_TCP_MD5SIG
2858static unsigned long tcp_md5sig_users;
2859static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
2860static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2861
2862static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
2863{
2864 int cpu;
2865
2866 for_each_possible_cpu(cpu) {
2867 struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
2868
2869 if (p->md5_desc.tfm)
2870 crypto_free_hash(p->md5_desc.tfm);
2871 }
2872 free_percpu(pool);
2873}
2874
2875void tcp_free_md5sig_pool(void)
2876{
2877 struct tcp_md5sig_pool __percpu *pool = NULL;
2878
2879 spin_lock_bh(&tcp_md5sig_pool_lock);
2880 if (--tcp_md5sig_users == 0) {
2881 pool = tcp_md5sig_pool;
2882 tcp_md5sig_pool = NULL;
2883 }
2884 spin_unlock_bh(&tcp_md5sig_pool_lock);
2885 if (pool)
2886 __tcp_free_md5sig_pool(pool);
2887}
2888EXPORT_SYMBOL(tcp_free_md5sig_pool);
2889
2890static struct tcp_md5sig_pool __percpu *
2891__tcp_alloc_md5sig_pool(struct sock *sk)
2892{
2893 int cpu;
2894 struct tcp_md5sig_pool __percpu *pool;
2895
2896 pool = alloc_percpu(struct tcp_md5sig_pool);
2897 if (!pool)
2898 return NULL;
2899
2900 for_each_possible_cpu(cpu) {
2901 struct crypto_hash *hash;
2902
2903 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
2904 if (!hash || IS_ERR(hash))
2905 goto out_free;
2906
2907 per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
2908 }
2909 return pool;
2910out_free:
2911 __tcp_free_md5sig_pool(pool);
2912 return NULL;
2913}
2914
2915struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
2916{
2917 struct tcp_md5sig_pool __percpu *pool;
2918 int alloc = 0;
2919
2920retry:
2921 spin_lock_bh(&tcp_md5sig_pool_lock);
2922 pool = tcp_md5sig_pool;
2923 if (tcp_md5sig_users++ == 0) {
2924 alloc = 1;
2925 spin_unlock_bh(&tcp_md5sig_pool_lock);
2926 } else if (!pool) {
2927 tcp_md5sig_users--;
2928 spin_unlock_bh(&tcp_md5sig_pool_lock);
2929 cpu_relax();
2930 goto retry;
2931 } else
2932 spin_unlock_bh(&tcp_md5sig_pool_lock);
2933
2934 if (alloc) {
2935
2936 struct tcp_md5sig_pool __percpu *p;
2937
2938 p = __tcp_alloc_md5sig_pool(sk);
2939 spin_lock_bh(&tcp_md5sig_pool_lock);
2940 if (!p) {
2941 tcp_md5sig_users--;
2942 spin_unlock_bh(&tcp_md5sig_pool_lock);
2943 return NULL;
2944 }
2945 pool = tcp_md5sig_pool;
2946 if (pool) {
2947
2948 spin_unlock_bh(&tcp_md5sig_pool_lock);
2949 __tcp_free_md5sig_pool(p);
2950 } else {
2951 tcp_md5sig_pool = pool = p;
2952 spin_unlock_bh(&tcp_md5sig_pool_lock);
2953 }
2954 }
2955 return pool;
2956}
2957EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2968{
2969 struct tcp_md5sig_pool __percpu *p;
2970
2971 local_bh_disable();
2972
2973 spin_lock(&tcp_md5sig_pool_lock);
2974 p = tcp_md5sig_pool;
2975 if (p)
2976 tcp_md5sig_users++;
2977 spin_unlock(&tcp_md5sig_pool_lock);
2978
2979 if (p)
2980 return this_cpu_ptr(p);
2981
2982 local_bh_enable();
2983 return NULL;
2984}
2985EXPORT_SYMBOL(tcp_get_md5sig_pool);
2986
2987void tcp_put_md5sig_pool(void)
2988{
2989 local_bh_enable();
2990 tcp_free_md5sig_pool();
2991}
2992EXPORT_SYMBOL(tcp_put_md5sig_pool);
2993
2994int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2995 const struct tcphdr *th)
2996{
2997 struct scatterlist sg;
2998 struct tcphdr hdr;
2999 int err;
3000
3001
3002 memcpy(&hdr, th, sizeof(hdr));
3003 hdr.check = 0;
3004
3005
3006 sg_init_one(&sg, &hdr, sizeof(hdr));
3007 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr));
3008 return err;
3009}
3010EXPORT_SYMBOL(tcp_md5_hash_header);
3011
3012int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3013 const struct sk_buff *skb, unsigned int header_len)
3014{
3015 struct scatterlist sg;
3016 const struct tcphdr *tp = tcp_hdr(skb);
3017 struct hash_desc *desc = &hp->md5_desc;
3018 unsigned i;
3019 const unsigned head_data_len = skb_headlen(skb) > header_len ?
3020 skb_headlen(skb) - header_len : 0;
3021 const struct skb_shared_info *shi = skb_shinfo(skb);
3022 struct sk_buff *frag_iter;
3023
3024 sg_init_table(&sg, 1);
3025
3026 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
3027 if (crypto_hash_update(desc, &sg, head_data_len))
3028 return 1;
3029
3030 for (i = 0; i < shi->nr_frags; ++i) {
3031 const struct skb_frag_struct *f = &shi->frags[i];
3032 struct page *page = skb_frag_page(f);
3033 sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
3034 if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
3035 return 1;
3036 }
3037
3038 skb_walk_frags(skb, frag_iter)
3039 if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
3040 return 1;
3041
3042 return 0;
3043}
3044EXPORT_SYMBOL(tcp_md5_hash_skb_data);
3045
3046int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
3047{
3048 struct scatterlist sg;
3049
3050 sg_init_one(&sg, key->key, key->keylen);
3051 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
3052}
3053EXPORT_SYMBOL(tcp_md5_hash_key);
3054
3055#endif
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081struct tcp_cookie_secret {
3082
3083
3084
3085
3086
3087 u32 secrets[COOKIE_WORKSPACE_WORDS];
3088 unsigned long expires;
3089};
3090
3091#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3092#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3093#define TCP_SECRET_LIFE (HZ * 600)
3094
3095static struct tcp_cookie_secret tcp_secret_one;
3096static struct tcp_cookie_secret tcp_secret_two;
3097
3098
3099static struct tcp_cookie_secret *tcp_secret_generating;
3100static struct tcp_cookie_secret *tcp_secret_primary;
3101static struct tcp_cookie_secret *tcp_secret_retiring;
3102static struct tcp_cookie_secret *tcp_secret_secondary;
3103
3104static DEFINE_SPINLOCK(tcp_secret_locker);
3105
3106
3107
3108static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3109{
3110 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3111}
3112
3113
3114
3115
3116
3117int tcp_cookie_generator(u32 *bakery)
3118{
3119 unsigned long jiffy = jiffies;
3120
3121 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3122 spin_lock_bh(&tcp_secret_locker);
3123 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3124
3125 memcpy(bakery,
3126 &tcp_secret_generating->secrets[0],
3127 COOKIE_WORKSPACE_WORDS);
3128 } else {
3129
3130 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141 if (unlikely(tcp_secret_primary->expires ==
3142 tcp_secret_secondary->expires)) {
3143 struct timespec tv;
3144
3145 getnstimeofday(&tv);
3146 bakery[COOKIE_DIGEST_WORDS+0] ^=
3147 (u32)tv.tv_nsec;
3148
3149 tcp_secret_secondary->expires = jiffy
3150 + TCP_SECRET_1MSL
3151 + (0x0f & tcp_cookie_work(bakery, 0));
3152 } else {
3153 tcp_secret_secondary->expires = jiffy
3154 + TCP_SECRET_LIFE
3155 + (0xff & tcp_cookie_work(bakery, 1));
3156 tcp_secret_primary->expires = jiffy
3157 + TCP_SECRET_2MSL
3158 + (0x1f & tcp_cookie_work(bakery, 2));
3159 }
3160 memcpy(&tcp_secret_secondary->secrets[0],
3161 bakery, COOKIE_WORKSPACE_WORDS);
3162
3163 rcu_assign_pointer(tcp_secret_generating,
3164 tcp_secret_secondary);
3165 rcu_assign_pointer(tcp_secret_retiring,
3166 tcp_secret_primary);
3167
3168
3169
3170
3171
3172
3173 }
3174 spin_unlock_bh(&tcp_secret_locker);
3175 } else {
3176 rcu_read_lock_bh();
3177 memcpy(bakery,
3178 &rcu_dereference(tcp_secret_generating)->secrets[0],
3179 COOKIE_WORKSPACE_WORDS);
3180 rcu_read_unlock_bh();
3181 }
3182 return 0;
3183}
3184EXPORT_SYMBOL(tcp_cookie_generator);
3185
3186void tcp_done(struct sock *sk)
3187{
3188 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
3189 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
3190
3191 tcp_set_state(sk, TCP_CLOSE);
3192 tcp_clear_xmit_timers(sk);
3193
3194 sk->sk_shutdown = SHUTDOWN_MASK;
3195
3196 if (!sock_flag(sk, SOCK_DEAD))
3197 sk->sk_state_change(sk);
3198 else
3199 inet_csk_destroy_sock(sk);
3200}
3201EXPORT_SYMBOL_GPL(tcp_done);
3202
3203extern struct tcp_congestion_ops tcp_reno;
3204
3205static __initdata unsigned long thash_entries;
3206static int __init set_thash_entries(char *str)
3207{
3208 if (!str)
3209 return 0;
3210 thash_entries = simple_strtoul(str, &str, 0);
3211 return 1;
3212}
3213__setup("thash_entries=", set_thash_entries);
3214
3215void __init tcp_init(void)
3216{
3217 struct sk_buff *skb = NULL;
3218 unsigned long limit;
3219 int i, max_share, cnt;
3220 unsigned long jiffy = jiffies;
3221
3222 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
3223
3224 percpu_counter_init(&tcp_sockets_allocated, 0);
3225 percpu_counter_init(&tcp_orphan_count, 0);
3226 tcp_hashinfo.bind_bucket_cachep =
3227 kmem_cache_create("tcp_bind_bucket",
3228 sizeof(struct inet_bind_bucket), 0,
3229 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3230
3231
3232
3233
3234
3235
3236 tcp_hashinfo.ehash =
3237 alloc_large_system_hash("TCP established",
3238 sizeof(struct inet_ehash_bucket),
3239 thash_entries,
3240 (totalram_pages >= 128 * 1024) ?
3241 13 : 15,
3242 0,
3243 NULL,
3244 &tcp_hashinfo.ehash_mask,
3245 thash_entries ? 0 : 512 * 1024);
3246 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
3247 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
3248 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
3249 }
3250 if (inet_ehash_locks_alloc(&tcp_hashinfo))
3251 panic("TCP: failed to alloc ehash_locks");
3252 tcp_hashinfo.bhash =
3253 alloc_large_system_hash("TCP bind",
3254 sizeof(struct inet_bind_hashbucket),
3255 tcp_hashinfo.ehash_mask + 1,
3256 (totalram_pages >= 128 * 1024) ?
3257 13 : 15,
3258 0,
3259 &tcp_hashinfo.bhash_size,
3260 NULL,
3261 64 * 1024);
3262 tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
3263 for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
3264 spin_lock_init(&tcp_hashinfo.bhash[i].lock);
3265 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
3266 }
3267
3268
3269 cnt = tcp_hashinfo.ehash_mask + 1;
3270
3271 tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
3272 sysctl_tcp_max_orphans = cnt / 2;
3273 sysctl_max_syn_backlog = max(128, cnt / 256);
3274
3275 limit = nr_free_buffer_pages() / 8;
3276 limit = max(limit, 128UL);
3277 sysctl_tcp_mem[0] = limit / 4 * 3;
3278 sysctl_tcp_mem[1] = limit;
3279 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
3280
3281
3282 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
3283 max_share = min(4UL*1024*1024, limit);
3284
3285 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3286 sysctl_tcp_wmem[1] = 16*1024;
3287 sysctl_tcp_wmem[2] = max(64*1024, max_share);
3288
3289 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3290 sysctl_tcp_rmem[1] = 87380;
3291 sysctl_tcp_rmem[2] = max(87380, max_share);
3292
3293 printk(KERN_INFO "TCP: Hash tables configured "
3294 "(established %u bind %u)\n",
3295 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
3296
3297 tcp_register_congestion_control(&tcp_reno);
3298
3299 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3300 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3301 tcp_secret_one.expires = jiffy;
3302 tcp_secret_two.expires = jiffy;
3303 tcp_secret_generating = &tcp_secret_one;
3304 tcp_secret_primary = &tcp_secret_one;
3305 tcp_secret_retiring = &tcp_secret_two;
3306 tcp_secret_secondary = &tcp_secret_two;
3307}
3308