1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248#include <linux/kernel.h>
249#include <linux/module.h>
250#include <linux/types.h>
251#include <linux/fcntl.h>
252#include <linux/poll.h>
253#include <linux/init.h>
254#include <linux/fs.h>
255#include <linux/skbuff.h>
256#include <linux/scatterlist.h>
257#include <linux/splice.h>
258#include <linux/net.h>
259#include <linux/socket.h>
260#include <linux/random.h>
261#include <linux/bootmem.h>
262#include <linux/highmem.h>
263#include <linux/swap.h>
264#include <linux/cache.h>
265#include <linux/err.h>
266#include <linux/crypto.h>
267#include <linux/time.h>
268#include <linux/slab.h>
269
270#include <net/icmp.h>
271#include <net/tcp.h>
272#include <net/xfrm.h>
273#include <net/ip.h>
274#include <net/netdma.h>
275#include <net/sock.h>
276
277#include <asm/uaccess.h>
278#include <asm/ioctls.h>
279
280int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
281
282struct percpu_counter tcp_orphan_count;
283EXPORT_SYMBOL_GPL(tcp_orphan_count);
284
285int sysctl_tcp_wmem[3] __read_mostly;
286int sysctl_tcp_rmem[3] __read_mostly;
287
288EXPORT_SYMBOL(sysctl_tcp_rmem);
289EXPORT_SYMBOL(sysctl_tcp_wmem);
290
291atomic_long_t tcp_memory_allocated;
292EXPORT_SYMBOL(tcp_memory_allocated);
293
294
295
296
297struct percpu_counter tcp_sockets_allocated;
298EXPORT_SYMBOL(tcp_sockets_allocated);
299
300
301
302
303struct tcp_splice_state {
304 struct pipe_inode_info *pipe;
305 size_t len;
306 unsigned int flags;
307};
308
309
310
311
312
313
314
315int tcp_memory_pressure __read_mostly;
316EXPORT_SYMBOL(tcp_memory_pressure);
317
318void tcp_enter_memory_pressure(struct sock *sk)
319{
320 if (!tcp_memory_pressure) {
321 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
322 tcp_memory_pressure = 1;
323 }
324}
325EXPORT_SYMBOL(tcp_enter_memory_pressure);
326
327
328static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
329{
330 u8 res = 0;
331
332 if (seconds > 0) {
333 int period = timeout;
334
335 res = 1;
336 while (seconds > period && res < 255) {
337 res++;
338 timeout <<= 1;
339 if (timeout > rto_max)
340 timeout = rto_max;
341 period += timeout;
342 }
343 }
344 return res;
345}
346
347
348static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
349{
350 int period = 0;
351
352 if (retrans > 0) {
353 period = timeout;
354 while (--retrans) {
355 timeout <<= 1;
356 if (timeout > rto_max)
357 timeout = rto_max;
358 period += timeout;
359 }
360 }
361 return period;
362}
363
364
365
366
367
368
369
370
371unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
372{
373 unsigned int mask;
374 struct sock *sk = sock->sk;
375 const struct tcp_sock *tp = tcp_sk(sk);
376
377 sock_poll_wait(file, sk_sleep(sk), wait);
378 if (sk->sk_state == TCP_LISTEN)
379 return inet_csk_listen_poll(sk);
380
381
382
383
384
385
386 mask = 0;
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
416 mask |= POLLHUP;
417 if (sk->sk_shutdown & RCV_SHUTDOWN)
418 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
419
420
421 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
422 int target = sock_rcvlowat(sk, 0, INT_MAX);
423
424 if (tp->urg_seq == tp->copied_seq &&
425 !sock_flag(sk, SOCK_URGINLINE) &&
426 tp->urg_data)
427 target++;
428
429
430
431
432 if (tp->rcv_nxt - tp->copied_seq >= target)
433 mask |= POLLIN | POLLRDNORM;
434
435 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
436 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
437 mask |= POLLOUT | POLLWRNORM;
438 } else {
439 set_bit(SOCK_ASYNC_NOSPACE,
440 &sk->sk_socket->flags);
441 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
442
443
444
445
446
447 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
448 mask |= POLLOUT | POLLWRNORM;
449 }
450 } else
451 mask |= POLLOUT | POLLWRNORM;
452
453 if (tp->urg_data & TCP_URG_VALID)
454 mask |= POLLPRI;
455 }
456
457 smp_rmb();
458 if (sk->sk_err)
459 mask |= POLLERR;
460
461 return mask;
462}
463EXPORT_SYMBOL(tcp_poll);
464
465int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
466{
467 struct tcp_sock *tp = tcp_sk(sk);
468 int answ;
469
470 switch (cmd) {
471 case SIOCINQ:
472 if (sk->sk_state == TCP_LISTEN)
473 return -EINVAL;
474
475 lock_sock(sk);
476 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
477 answ = 0;
478 else if (sock_flag(sk, SOCK_URGINLINE) ||
479 !tp->urg_data ||
480 before(tp->urg_seq, tp->copied_seq) ||
481 !before(tp->urg_seq, tp->rcv_nxt)) {
482 struct sk_buff *skb;
483
484 answ = tp->rcv_nxt - tp->copied_seq;
485
486
487 skb = skb_peek_tail(&sk->sk_receive_queue);
488 if (answ && skb)
489 answ -= tcp_hdr(skb)->fin;
490 } else
491 answ = tp->urg_seq - tp->copied_seq;
492 release_sock(sk);
493 break;
494 case SIOCATMARK:
495 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
496 break;
497 case SIOCOUTQ:
498 if (sk->sk_state == TCP_LISTEN)
499 return -EINVAL;
500
501 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
502 answ = 0;
503 else
504 answ = tp->write_seq - tp->snd_una;
505 break;
506 case SIOCOUTQNSD:
507 if (sk->sk_state == TCP_LISTEN)
508 return -EINVAL;
509
510 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
511 answ = 0;
512 else
513 answ = tp->write_seq - tp->snd_nxt;
514 break;
515 default:
516 return -ENOIOCTLCMD;
517 }
518
519 return put_user(answ, (int __user *)arg);
520}
521EXPORT_SYMBOL(tcp_ioctl);
522
523static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
524{
525 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
526 tp->pushed_seq = tp->write_seq;
527}
528
529static inline int forced_push(const struct tcp_sock *tp)
530{
531 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
532}
533
534static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
535{
536 struct tcp_sock *tp = tcp_sk(sk);
537 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
538
539 skb->csum = 0;
540 tcb->seq = tcb->end_seq = tp->write_seq;
541 tcb->tcp_flags = TCPHDR_ACK;
542 tcb->sacked = 0;
543 skb_header_release(skb);
544 tcp_add_write_queue_tail(sk, skb);
545 sk->sk_wmem_queued += skb->truesize;
546 sk_mem_charge(sk, skb->truesize);
547 if (tp->nonagle & TCP_NAGLE_PUSH)
548 tp->nonagle &= ~TCP_NAGLE_PUSH;
549}
550
551static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
552{
553 if (flags & MSG_OOB)
554 tp->snd_up = tp->write_seq;
555}
556
557static inline void tcp_push(struct sock *sk, int flags, int mss_now,
558 int nonagle)
559{
560 if (tcp_send_head(sk)) {
561 struct tcp_sock *tp = tcp_sk(sk);
562
563 if (!(flags & MSG_MORE) || forced_push(tp))
564 tcp_mark_push(tp, tcp_write_queue_tail(sk));
565
566 tcp_mark_urg(tp, flags);
567 __tcp_push_pending_frames(sk, mss_now,
568 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
569 }
570}
571
572static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
573 unsigned int offset, size_t len)
574{
575 struct tcp_splice_state *tss = rd_desc->arg.data;
576 int ret;
577
578 ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
579 tss->flags);
580 if (ret > 0)
581 rd_desc->count -= ret;
582 return ret;
583}
584
585static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
586{
587
588 read_descriptor_t rd_desc = {
589 .arg.data = tss,
590 .count = tss->len,
591 };
592
593 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
594}
595
596
597
598
599
600
601
602
603
604
605
606
607
608ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
609 struct pipe_inode_info *pipe, size_t len,
610 unsigned int flags)
611{
612 struct sock *sk = sock->sk;
613 struct tcp_splice_state tss = {
614 .pipe = pipe,
615 .len = len,
616 .flags = flags,
617 };
618 long timeo;
619 ssize_t spliced;
620 int ret;
621
622 sock_rps_record_flow(sk);
623
624
625
626 if (unlikely(*ppos))
627 return -ESPIPE;
628
629 ret = spliced = 0;
630
631 lock_sock(sk);
632
633 timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
634 while (tss.len) {
635 ret = __tcp_splice_read(sk, &tss);
636 if (ret < 0)
637 break;
638 else if (!ret) {
639 if (spliced)
640 break;
641 if (sock_flag(sk, SOCK_DONE))
642 break;
643 if (sk->sk_err) {
644 ret = sock_error(sk);
645 break;
646 }
647 if (sk->sk_shutdown & RCV_SHUTDOWN)
648 break;
649 if (sk->sk_state == TCP_CLOSE) {
650
651
652
653
654 if (!sock_flag(sk, SOCK_DONE))
655 ret = -ENOTCONN;
656 break;
657 }
658 if (!timeo) {
659 ret = -EAGAIN;
660 break;
661 }
662 sk_wait_data(sk, &timeo);
663 if (signal_pending(current)) {
664 ret = sock_intr_errno(timeo);
665 break;
666 }
667 continue;
668 }
669 tss.len -= ret;
670 spliced += ret;
671
672 if (!timeo)
673 break;
674 release_sock(sk);
675 lock_sock(sk);
676
677 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
678 (sk->sk_shutdown & RCV_SHUTDOWN) ||
679 signal_pending(current))
680 break;
681 }
682
683 release_sock(sk);
684
685 if (spliced)
686 return spliced;
687
688 return ret;
689}
690EXPORT_SYMBOL(tcp_splice_read);
691
692struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
693{
694 struct sk_buff *skb;
695
696
697 size = ALIGN(size, 4);
698
699 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
700 if (skb) {
701 if (sk_wmem_schedule(sk, skb->truesize)) {
702
703
704
705
706 skb_reserve(skb, skb_tailroom(skb) - size);
707 return skb;
708 }
709 __kfree_skb(skb);
710 } else {
711 sk->sk_prot->enter_memory_pressure(sk);
712 sk_stream_moderate_sndbuf(sk);
713 }
714 return NULL;
715}
716
717static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
718 int large_allowed)
719{
720 struct tcp_sock *tp = tcp_sk(sk);
721 u32 xmit_size_goal, old_size_goal;
722
723 xmit_size_goal = mss_now;
724
725 if (large_allowed && sk_can_gso(sk)) {
726 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
727 inet_csk(sk)->icsk_af_ops->net_header_len -
728 inet_csk(sk)->icsk_ext_hdr_len -
729 tp->tcp_header_len);
730
731 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
732
733
734 old_size_goal = tp->xmit_size_goal_segs * mss_now;
735
736 if (likely(old_size_goal <= xmit_size_goal &&
737 old_size_goal + mss_now > xmit_size_goal)) {
738 xmit_size_goal = old_size_goal;
739 } else {
740 tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
741 xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
742 }
743 }
744
745 return max(xmit_size_goal, mss_now);
746}
747
748static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
749{
750 int mss_now;
751
752 mss_now = tcp_current_mss(sk);
753 *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
754
755 return mss_now;
756}
757
758static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
759 size_t psize, int flags)
760{
761 struct tcp_sock *tp = tcp_sk(sk);
762 int mss_now, size_goal;
763 int err;
764 ssize_t copied;
765 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
766
767
768 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
769 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
770 goto out_err;
771
772 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
773
774 mss_now = tcp_send_mss(sk, &size_goal, flags);
775 copied = 0;
776
777 err = -EPIPE;
778 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
779 goto out_err;
780
781 while (psize > 0) {
782 struct sk_buff *skb = tcp_write_queue_tail(sk);
783 struct page *page = pages[poffset / PAGE_SIZE];
784 int copy, i, can_coalesce;
785 int offset = poffset % PAGE_SIZE;
786 int size = min_t(size_t, psize, PAGE_SIZE - offset);
787
788 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
789new_segment:
790 if (!sk_stream_memory_free(sk))
791 goto wait_for_sndbuf;
792
793 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
794 if (!skb)
795 goto wait_for_memory;
796
797 skb_entail(sk, skb);
798 copy = size_goal;
799 }
800
801 if (copy > size)
802 copy = size;
803
804 i = skb_shinfo(skb)->nr_frags;
805 can_coalesce = skb_can_coalesce(skb, i, page, offset);
806 if (!can_coalesce && i >= MAX_SKB_FRAGS) {
807 tcp_mark_push(tp, skb);
808 goto new_segment;
809 }
810 if (!sk_wmem_schedule(sk, copy))
811 goto wait_for_memory;
812
813 if (can_coalesce) {
814 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
815 } else {
816 get_page(page);
817 skb_fill_page_desc(skb, i, page, offset, copy);
818 }
819
820 skb->len += copy;
821 skb->data_len += copy;
822 skb->truesize += copy;
823 sk->sk_wmem_queued += copy;
824 sk_mem_charge(sk, copy);
825 skb->ip_summed = CHECKSUM_PARTIAL;
826 tp->write_seq += copy;
827 TCP_SKB_CB(skb)->end_seq += copy;
828 skb_shinfo(skb)->gso_segs = 0;
829
830 if (!copied)
831 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
832
833 copied += copy;
834 poffset += copy;
835 if (!(psize -= copy))
836 goto out;
837
838 if (skb->len < size_goal || (flags & MSG_OOB))
839 continue;
840
841 if (forced_push(tp)) {
842 tcp_mark_push(tp, skb);
843 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
844 } else if (skb == tcp_send_head(sk))
845 tcp_push_one(sk, mss_now);
846 continue;
847
848wait_for_sndbuf:
849 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
850wait_for_memory:
851 if (copied)
852 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
853
854 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
855 goto do_error;
856
857 mss_now = tcp_send_mss(sk, &size_goal, flags);
858 }
859
860out:
861 if (copied)
862 tcp_push(sk, flags, mss_now, tp->nonagle);
863 return copied;
864
865do_error:
866 if (copied)
867 goto out;
868out_err:
869 return sk_stream_error(sk, flags, err);
870}
871
872int tcp_sendpage(struct sock *sk, struct page *page, int offset,
873 size_t size, int flags)
874{
875 ssize_t res;
876
877 if (!(sk->sk_route_caps & NETIF_F_SG) ||
878 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
879 return sock_no_sendpage(sk->sk_socket, page, offset, size,
880 flags);
881
882 lock_sock(sk);
883 res = do_tcp_sendpages(sk, &page, offset, size, flags);
884 release_sock(sk);
885 return res;
886}
887EXPORT_SYMBOL(tcp_sendpage);
888
889static inline int select_size(const struct sock *sk, bool sg)
890{
891 const struct tcp_sock *tp = tcp_sk(sk);
892 int tmp = tp->mss_cache;
893
894 if (sg) {
895 if (sk_can_gso(sk)) {
896
897
898
899 tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
900 } else {
901 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
902
903 if (tmp >= pgbreak &&
904 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
905 tmp = pgbreak;
906 }
907 }
908
909 return tmp;
910}
911
912int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
913 size_t size)
914{
915 struct iovec *iov;
916 struct tcp_sock *tp = tcp_sk(sk);
917 struct sk_buff *skb;
918 int iovlen, flags, err, copied;
919 int mss_now, size_goal;
920 bool sg;
921 long timeo;
922
923 lock_sock(sk);
924
925 flags = msg->msg_flags;
926 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
927
928
929 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
930 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
931 goto out_err;
932
933
934 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
935
936 mss_now = tcp_send_mss(sk, &size_goal, flags);
937
938
939 iovlen = msg->msg_iovlen;
940 iov = msg->msg_iov;
941 copied = 0;
942
943 err = -EPIPE;
944 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
945 goto out_err;
946
947 sg = !!(sk->sk_route_caps & NETIF_F_SG);
948
949 while (--iovlen >= 0) {
950 size_t seglen = iov->iov_len;
951 unsigned char __user *from = iov->iov_base;
952
953 iov++;
954
955 while (seglen > 0) {
956 int copy = 0;
957 int max = size_goal;
958
959 skb = tcp_write_queue_tail(sk);
960 if (tcp_send_head(sk)) {
961 if (skb->ip_summed == CHECKSUM_NONE)
962 max = mss_now;
963 copy = max - skb->len;
964 }
965
966 if (copy <= 0) {
967new_segment:
968
969
970
971 if (!sk_stream_memory_free(sk))
972 goto wait_for_sndbuf;
973
974 skb = sk_stream_alloc_skb(sk,
975 select_size(sk, sg),
976 sk->sk_allocation);
977 if (!skb)
978 goto wait_for_memory;
979
980
981
982
983 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
984 skb->ip_summed = CHECKSUM_PARTIAL;
985
986 skb_entail(sk, skb);
987 copy = size_goal;
988 max = size_goal;
989 }
990
991
992 if (copy > seglen)
993 copy = seglen;
994
995
996 if (skb_tailroom(skb) > 0) {
997
998 if (copy > skb_tailroom(skb))
999 copy = skb_tailroom(skb);
1000 err = skb_add_data_nocache(sk, skb, from, copy);
1001 if (err)
1002 goto do_fault;
1003 } else {
1004 int merge = 0;
1005 int i = skb_shinfo(skb)->nr_frags;
1006 struct page *page = sk->sk_sndmsg_page;
1007 int off;
1008
1009 if (page && page_count(page) == 1)
1010 sk->sk_sndmsg_off = 0;
1011
1012 off = sk->sk_sndmsg_off;
1013
1014 if (skb_can_coalesce(skb, i, page, off) &&
1015 off != PAGE_SIZE) {
1016
1017
1018 merge = 1;
1019 } else if (i == MAX_SKB_FRAGS || !sg) {
1020
1021
1022
1023
1024 tcp_mark_push(tp, skb);
1025 goto new_segment;
1026 } else if (page) {
1027 if (off == PAGE_SIZE) {
1028 put_page(page);
1029 sk->sk_sndmsg_page = page = NULL;
1030 off = 0;
1031 }
1032 } else
1033 off = 0;
1034
1035 if (copy > PAGE_SIZE - off)
1036 copy = PAGE_SIZE - off;
1037
1038 if (!sk_wmem_schedule(sk, copy))
1039 goto wait_for_memory;
1040
1041 if (!page) {
1042
1043 if (!(page = sk_stream_alloc_page(sk)))
1044 goto wait_for_memory;
1045 }
1046
1047
1048
1049 err = skb_copy_to_page_nocache(sk, from, skb,
1050 page, off, copy);
1051 if (err) {
1052
1053
1054
1055 if (!sk->sk_sndmsg_page) {
1056 sk->sk_sndmsg_page = page;
1057 sk->sk_sndmsg_off = 0;
1058 }
1059 goto do_error;
1060 }
1061
1062
1063 if (merge) {
1064 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1065 } else {
1066 skb_fill_page_desc(skb, i, page, off, copy);
1067 if (sk->sk_sndmsg_page) {
1068 get_page(page);
1069 } else if (off + copy < PAGE_SIZE) {
1070 get_page(page);
1071 sk->sk_sndmsg_page = page;
1072 }
1073 }
1074
1075 sk->sk_sndmsg_off = off + copy;
1076 }
1077
1078 if (!copied)
1079 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
1080
1081 tp->write_seq += copy;
1082 TCP_SKB_CB(skb)->end_seq += copy;
1083 skb_shinfo(skb)->gso_segs = 0;
1084
1085 from += copy;
1086 copied += copy;
1087 if ((seglen -= copy) == 0 && iovlen == 0)
1088 goto out;
1089
1090 if (skb->len < max || (flags & MSG_OOB))
1091 continue;
1092
1093 if (forced_push(tp)) {
1094 tcp_mark_push(tp, skb);
1095 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
1096 } else if (skb == tcp_send_head(sk))
1097 tcp_push_one(sk, mss_now);
1098 continue;
1099
1100wait_for_sndbuf:
1101 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1102wait_for_memory:
1103 if (copied)
1104 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1105
1106 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1107 goto do_error;
1108
1109 mss_now = tcp_send_mss(sk, &size_goal, flags);
1110 }
1111 }
1112
1113out:
1114 if (copied)
1115 tcp_push(sk, flags, mss_now, tp->nonagle);
1116 release_sock(sk);
1117 return copied;
1118
1119do_fault:
1120 if (!skb->len) {
1121 tcp_unlink_write_queue(skb, sk);
1122
1123
1124
1125 tcp_check_send_head(sk, skb);
1126 sk_wmem_free_skb(sk, skb);
1127 }
1128
1129do_error:
1130 if (copied)
1131 goto out;
1132out_err:
1133 err = sk_stream_error(sk, flags, err);
1134 release_sock(sk);
1135 return err;
1136}
1137EXPORT_SYMBOL(tcp_sendmsg);
1138
1139
1140
1141
1142
1143
1144static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1145{
1146 struct tcp_sock *tp = tcp_sk(sk);
1147
1148
1149 if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
1150 tp->urg_data == TCP_URG_READ)
1151 return -EINVAL;
1152
1153 if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
1154 return -ENOTCONN;
1155
1156 if (tp->urg_data & TCP_URG_VALID) {
1157 int err = 0;
1158 char c = tp->urg_data;
1159
1160 if (!(flags & MSG_PEEK))
1161 tp->urg_data = TCP_URG_READ;
1162
1163
1164 msg->msg_flags |= MSG_OOB;
1165
1166 if (len > 0) {
1167 if (!(flags & MSG_TRUNC))
1168 err = memcpy_toiovec(msg->msg_iov, &c, 1);
1169 len = 1;
1170 } else
1171 msg->msg_flags |= MSG_TRUNC;
1172
1173 return err ? -EFAULT : len;
1174 }
1175
1176 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
1177 return 0;
1178
1179
1180
1181
1182
1183
1184
1185 return -EAGAIN;
1186}
1187
1188
1189
1190
1191
1192
1193
1194void tcp_cleanup_rbuf(struct sock *sk, int copied)
1195{
1196 struct tcp_sock *tp = tcp_sk(sk);
1197 int time_to_ack = 0;
1198
1199 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1200
1201 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
1202 "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
1203 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
1204
1205 if (inet_csk_ack_scheduled(sk)) {
1206 const struct inet_connection_sock *icsk = inet_csk(sk);
1207
1208
1209 if (icsk->icsk_ack.blocked ||
1210
1211 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
1212
1213
1214
1215
1216
1217
1218 (copied > 0 &&
1219 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
1220 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
1221 !icsk->icsk_ack.pingpong)) &&
1222 !atomic_read(&sk->sk_rmem_alloc)))
1223 time_to_ack = 1;
1224 }
1225
1226
1227
1228
1229
1230
1231
1232 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1233 __u32 rcv_window_now = tcp_receive_window(tp);
1234
1235
1236 if (2*rcv_window_now <= tp->window_clamp) {
1237 __u32 new_window = __tcp_select_window(sk);
1238
1239
1240
1241
1242
1243
1244 if (new_window && new_window >= 2 * rcv_window_now)
1245 time_to_ack = 1;
1246 }
1247 }
1248 if (time_to_ack)
1249 tcp_send_ack(sk);
1250}
1251
1252static void tcp_prequeue_process(struct sock *sk)
1253{
1254 struct sk_buff *skb;
1255 struct tcp_sock *tp = tcp_sk(sk);
1256
1257 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1258
1259
1260
1261 local_bh_disable();
1262 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1263 sk_backlog_rcv(sk, skb);
1264 local_bh_enable();
1265
1266
1267 tp->ucopy.memory = 0;
1268}
1269
1270#ifdef CONFIG_NET_DMA
1271static void tcp_service_net_dma(struct sock *sk, bool wait)
1272{
1273 dma_cookie_t done, used;
1274 dma_cookie_t last_issued;
1275 struct tcp_sock *tp = tcp_sk(sk);
1276
1277 if (!tp->ucopy.dma_chan)
1278 return;
1279
1280 last_issued = tp->ucopy.dma_cookie;
1281 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1282
1283 do {
1284 if (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1285 last_issued, &done,
1286 &used) == DMA_SUCCESS) {
1287
1288 __skb_queue_purge(&sk->sk_async_wait_queue);
1289 break;
1290 } else {
1291 struct sk_buff *skb;
1292 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1293 (dma_async_is_complete(skb->dma_cookie, done,
1294 used) == DMA_SUCCESS)) {
1295 __skb_dequeue(&sk->sk_async_wait_queue);
1296 kfree_skb(skb);
1297 }
1298 }
1299 } while (wait);
1300}
1301#endif
1302
1303static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1304{
1305 struct sk_buff *skb;
1306 u32 offset;
1307
1308 skb_queue_walk(&sk->sk_receive_queue, skb) {
1309 offset = seq - TCP_SKB_CB(skb)->seq;
1310 if (tcp_hdr(skb)->syn)
1311 offset--;
1312 if (offset < skb->len || tcp_hdr(skb)->fin) {
1313 *off = offset;
1314 return skb;
1315 }
1316 }
1317 return NULL;
1318}
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1332 sk_read_actor_t recv_actor)
1333{
1334 struct sk_buff *skb;
1335 struct tcp_sock *tp = tcp_sk(sk);
1336 u32 seq = tp->copied_seq;
1337 u32 offset;
1338 int copied = 0;
1339
1340 if (sk->sk_state == TCP_LISTEN)
1341 return -ENOTCONN;
1342 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1343 if (offset < skb->len) {
1344 int used;
1345 size_t len;
1346
1347 len = skb->len - offset;
1348
1349 if (tp->urg_data) {
1350 u32 urg_offset = tp->urg_seq - seq;
1351 if (urg_offset < len)
1352 len = urg_offset;
1353 if (!len)
1354 break;
1355 }
1356 used = recv_actor(desc, skb, offset, len);
1357 if (used < 0) {
1358 if (!copied)
1359 copied = used;
1360 break;
1361 } else if (used <= len) {
1362 seq += used;
1363 copied += used;
1364 offset += used;
1365 }
1366
1367
1368
1369
1370
1371
1372 skb = tcp_recv_skb(sk, seq-1, &offset);
1373 if (!skb || (offset+1 != skb->len))
1374 break;
1375 }
1376 if (tcp_hdr(skb)->fin) {
1377 sk_eat_skb(sk, skb, 0);
1378 ++seq;
1379 break;
1380 }
1381 sk_eat_skb(sk, skb, 0);
1382 if (!desc->count)
1383 break;
1384 tp->copied_seq = seq;
1385 }
1386 tp->copied_seq = seq;
1387
1388 tcp_rcv_space_adjust(sk);
1389
1390
1391 if (copied > 0)
1392 tcp_cleanup_rbuf(sk, copied);
1393 return copied;
1394}
1395EXPORT_SYMBOL(tcp_read_sock);
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1406 size_t len, int nonblock, int flags, int *addr_len)
1407{
1408 struct tcp_sock *tp = tcp_sk(sk);
1409 int copied = 0;
1410 u32 peek_seq;
1411 u32 *seq;
1412 unsigned long used;
1413 int err;
1414 int target;
1415 long timeo;
1416 struct task_struct *user_recv = NULL;
1417 int copied_early = 0;
1418 struct sk_buff *skb;
1419 u32 urg_hole = 0;
1420
1421 lock_sock(sk);
1422
1423 err = -ENOTCONN;
1424 if (sk->sk_state == TCP_LISTEN)
1425 goto out;
1426
1427 timeo = sock_rcvtimeo(sk, nonblock);
1428
1429
1430 if (flags & MSG_OOB)
1431 goto recv_urg;
1432
1433 seq = &tp->copied_seq;
1434 if (flags & MSG_PEEK) {
1435 peek_seq = tp->copied_seq;
1436 seq = &peek_seq;
1437 }
1438
1439 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1440
1441#ifdef CONFIG_NET_DMA
1442 tp->ucopy.dma_chan = NULL;
1443 preempt_disable();
1444 skb = skb_peek_tail(&sk->sk_receive_queue);
1445 {
1446 int available = 0;
1447
1448 if (skb)
1449 available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1450 if ((available < target) &&
1451 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1452 !sysctl_tcp_low_latency &&
1453 dma_find_channel(DMA_MEMCPY)) {
1454 preempt_enable_no_resched();
1455 tp->ucopy.pinned_list =
1456 dma_pin_iovec_pages(msg->msg_iov, len);
1457 } else {
1458 preempt_enable_no_resched();
1459 }
1460 }
1461#endif
1462
1463 do {
1464 u32 offset;
1465
1466
1467 if (tp->urg_data && tp->urg_seq == *seq) {
1468 if (copied)
1469 break;
1470 if (signal_pending(current)) {
1471 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
1472 break;
1473 }
1474 }
1475
1476
1477
1478 skb_queue_walk(&sk->sk_receive_queue, skb) {
1479
1480
1481
1482 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
1483 "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
1484 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
1485 flags))
1486 break;
1487
1488 offset = *seq - TCP_SKB_CB(skb)->seq;
1489 if (tcp_hdr(skb)->syn)
1490 offset--;
1491 if (offset < skb->len)
1492 goto found_ok_skb;
1493 if (tcp_hdr(skb)->fin)
1494 goto found_fin_ok;
1495 WARN(!(flags & MSG_PEEK),
1496 "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
1497 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
1498 }
1499
1500
1501
1502 if (copied >= target && !sk->sk_backlog.tail)
1503 break;
1504
1505 if (copied) {
1506 if (sk->sk_err ||
1507 sk->sk_state == TCP_CLOSE ||
1508 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1509 !timeo ||
1510 signal_pending(current))
1511 break;
1512 } else {
1513 if (sock_flag(sk, SOCK_DONE))
1514 break;
1515
1516 if (sk->sk_err) {
1517 copied = sock_error(sk);
1518 break;
1519 }
1520
1521 if (sk->sk_shutdown & RCV_SHUTDOWN)
1522 break;
1523
1524 if (sk->sk_state == TCP_CLOSE) {
1525 if (!sock_flag(sk, SOCK_DONE)) {
1526
1527
1528
1529 copied = -ENOTCONN;
1530 break;
1531 }
1532 break;
1533 }
1534
1535 if (!timeo) {
1536 copied = -EAGAIN;
1537 break;
1538 }
1539
1540 if (signal_pending(current)) {
1541 copied = sock_intr_errno(timeo);
1542 break;
1543 }
1544 }
1545
1546 tcp_cleanup_rbuf(sk, copied);
1547
1548 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1549
1550 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1551 user_recv = current;
1552 tp->ucopy.task = user_recv;
1553 tp->ucopy.iov = msg->msg_iov;
1554 }
1555
1556 tp->ucopy.len = len;
1557
1558 WARN_ON(tp->copied_seq != tp->rcv_nxt &&
1559 !(flags & (MSG_PEEK | MSG_TRUNC)));
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587 if (!skb_queue_empty(&tp->ucopy.prequeue))
1588 goto do_prequeue;
1589
1590
1591 }
1592
1593#ifdef CONFIG_NET_DMA
1594 if (tp->ucopy.dma_chan)
1595 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1596#endif
1597 if (copied >= target) {
1598
1599 release_sock(sk);
1600 lock_sock(sk);
1601 } else
1602 sk_wait_data(sk, &timeo);
1603
1604#ifdef CONFIG_NET_DMA
1605 tcp_service_net_dma(sk, false);
1606 tp->ucopy.wakeup = 0;
1607#endif
1608
1609 if (user_recv) {
1610 int chunk;
1611
1612
1613
1614 if ((chunk = len - tp->ucopy.len) != 0) {
1615 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1616 len -= chunk;
1617 copied += chunk;
1618 }
1619
1620 if (tp->rcv_nxt == tp->copied_seq &&
1621 !skb_queue_empty(&tp->ucopy.prequeue)) {
1622do_prequeue:
1623 tcp_prequeue_process(sk);
1624
1625 if ((chunk = len - tp->ucopy.len) != 0) {
1626 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1627 len -= chunk;
1628 copied += chunk;
1629 }
1630 }
1631 }
1632 if ((flags & MSG_PEEK) &&
1633 (peek_seq - copied - urg_hole != tp->copied_seq)) {
1634 if (net_ratelimit())
1635 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1636 current->comm, task_pid_nr(current));
1637 peek_seq = tp->copied_seq;
1638 }
1639 continue;
1640
1641 found_ok_skb:
1642
1643 used = skb->len - offset;
1644 if (len < used)
1645 used = len;
1646
1647
1648 if (tp->urg_data) {
1649 u32 urg_offset = tp->urg_seq - *seq;
1650 if (urg_offset < used) {
1651 if (!urg_offset) {
1652 if (!sock_flag(sk, SOCK_URGINLINE)) {
1653 ++*seq;
1654 urg_hole++;
1655 offset++;
1656 used--;
1657 if (!used)
1658 goto skip_copy;
1659 }
1660 } else
1661 used = urg_offset;
1662 }
1663 }
1664
1665 if (!(flags & MSG_TRUNC)) {
1666#ifdef CONFIG_NET_DMA
1667 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1668 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1669
1670 if (tp->ucopy.dma_chan) {
1671 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1672 tp->ucopy.dma_chan, skb, offset,
1673 msg->msg_iov, used,
1674 tp->ucopy.pinned_list);
1675
1676 if (tp->ucopy.dma_cookie < 0) {
1677
1678 printk(KERN_ALERT "dma_cookie < 0\n");
1679
1680
1681 if (!copied)
1682 copied = -EFAULT;
1683 break;
1684 }
1685
1686 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1687
1688 if ((offset + used) == skb->len)
1689 copied_early = 1;
1690
1691 } else
1692#endif
1693 {
1694 err = skb_copy_datagram_iovec(skb, offset,
1695 msg->msg_iov, used);
1696 if (err) {
1697
1698 if (!copied)
1699 copied = -EFAULT;
1700 break;
1701 }
1702 }
1703 }
1704
1705 *seq += used;
1706 copied += used;
1707 len -= used;
1708
1709 tcp_rcv_space_adjust(sk);
1710
1711skip_copy:
1712 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1713 tp->urg_data = 0;
1714 tcp_fast_path_check(sk);
1715 }
1716 if (used + offset < skb->len)
1717 continue;
1718
1719 if (tcp_hdr(skb)->fin)
1720 goto found_fin_ok;
1721 if (!(flags & MSG_PEEK)) {
1722 sk_eat_skb(sk, skb, copied_early);
1723 copied_early = 0;
1724 }
1725 continue;
1726
1727 found_fin_ok:
1728
1729 ++*seq;
1730 if (!(flags & MSG_PEEK)) {
1731 sk_eat_skb(sk, skb, copied_early);
1732 copied_early = 0;
1733 }
1734 break;
1735 } while (len > 0);
1736
1737 if (user_recv) {
1738 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1739 int chunk;
1740
1741 tp->ucopy.len = copied > 0 ? len : 0;
1742
1743 tcp_prequeue_process(sk);
1744
1745 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1746 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1747 len -= chunk;
1748 copied += chunk;
1749 }
1750 }
1751
1752 tp->ucopy.task = NULL;
1753 tp->ucopy.len = 0;
1754 }
1755
1756#ifdef CONFIG_NET_DMA
1757 tcp_service_net_dma(sk, true);
1758 tp->ucopy.dma_chan = NULL;
1759
1760 if (tp->ucopy.pinned_list) {
1761 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1762 tp->ucopy.pinned_list = NULL;
1763 }
1764#endif
1765
1766
1767
1768
1769
1770
1771 tcp_cleanup_rbuf(sk, copied);
1772
1773 release_sock(sk);
1774 return copied;
1775
1776out:
1777 release_sock(sk);
1778 return err;
1779
1780recv_urg:
1781 err = tcp_recv_urg(sk, msg, len, flags);
1782 goto out;
1783}
1784EXPORT_SYMBOL(tcp_recvmsg);
1785
1786void tcp_set_state(struct sock *sk, int state)
1787{
1788 int oldstate = sk->sk_state;
1789
1790 switch (state) {
1791 case TCP_ESTABLISHED:
1792 if (oldstate != TCP_ESTABLISHED)
1793 TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1794 break;
1795
1796 case TCP_CLOSE:
1797 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1798 TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
1799
1800 sk->sk_prot->unhash(sk);
1801 if (inet_csk(sk)->icsk_bind_hash &&
1802 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1803 inet_put_port(sk);
1804
1805 default:
1806 if (oldstate == TCP_ESTABLISHED)
1807 TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1808 }
1809
1810
1811
1812
1813 sk->sk_state = state;
1814
1815#ifdef STATE_TRACE
1816 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
1817#endif
1818}
1819EXPORT_SYMBOL_GPL(tcp_set_state);
1820
1821
1822
1823
1824
1825
1826
1827
1828static const unsigned char new_state[16] = {
1829
1830 TCP_CLOSE,
1831 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1832 TCP_CLOSE,
1833 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1834 TCP_FIN_WAIT1,
1835 TCP_FIN_WAIT2,
1836 TCP_CLOSE,
1837 TCP_CLOSE,
1838 TCP_LAST_ACK | TCP_ACTION_FIN,
1839 TCP_LAST_ACK,
1840 TCP_CLOSE,
1841 TCP_CLOSING,
1842};
1843
1844static int tcp_close_state(struct sock *sk)
1845{
1846 int next = (int)new_state[sk->sk_state];
1847 int ns = next & TCP_STATE_MASK;
1848
1849 tcp_set_state(sk, ns);
1850
1851 return next & TCP_ACTION_FIN;
1852}
1853
1854
1855
1856
1857
1858
1859void tcp_shutdown(struct sock *sk, int how)
1860{
1861
1862
1863
1864
1865 if (!(how & SEND_SHUTDOWN))
1866 return;
1867
1868
1869 if ((1 << sk->sk_state) &
1870 (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1871 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1872
1873 if (tcp_close_state(sk))
1874 tcp_send_fin(sk);
1875 }
1876}
1877EXPORT_SYMBOL(tcp_shutdown);
1878
1879bool tcp_check_oom(struct sock *sk, int shift)
1880{
1881 bool too_many_orphans, out_of_socket_memory;
1882
1883 too_many_orphans = tcp_too_many_orphans(sk, shift);
1884 out_of_socket_memory = tcp_out_of_memory(sk);
1885
1886 if (too_many_orphans && net_ratelimit())
1887 pr_info("TCP: too many orphaned sockets\n");
1888 if (out_of_socket_memory && net_ratelimit())
1889 pr_info("TCP: out of memory -- consider tuning tcp_mem\n");
1890 return too_many_orphans || out_of_socket_memory;
1891}
1892
1893void tcp_close(struct sock *sk, long timeout)
1894{
1895 struct sk_buff *skb;
1896 int data_was_unread = 0;
1897 int state;
1898
1899 lock_sock(sk);
1900 sk->sk_shutdown = SHUTDOWN_MASK;
1901
1902 if (sk->sk_state == TCP_LISTEN) {
1903 tcp_set_state(sk, TCP_CLOSE);
1904
1905
1906 inet_csk_listen_stop(sk);
1907
1908 goto adjudge_to_death;
1909 }
1910
1911
1912
1913
1914
1915 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1916 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
1917 tcp_hdr(skb)->fin;
1918 data_was_unread += len;
1919 __kfree_skb(skb);
1920 }
1921
1922 sk_mem_reclaim(sk);
1923
1924
1925 if (sk->sk_state == TCP_CLOSE)
1926 goto adjudge_to_death;
1927
1928
1929
1930
1931
1932
1933
1934
1935 if (data_was_unread) {
1936
1937 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1938 tcp_set_state(sk, TCP_CLOSE);
1939 tcp_send_active_reset(sk, sk->sk_allocation);
1940 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1941
1942 sk->sk_prot->disconnect(sk, 0);
1943 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
1944 } else if (tcp_close_state(sk)) {
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970 tcp_send_fin(sk);
1971 }
1972
1973 sk_stream_wait_close(sk, timeout);
1974
1975adjudge_to_death:
1976 state = sk->sk_state;
1977 sock_hold(sk);
1978 sock_orphan(sk);
1979
1980
1981 release_sock(sk);
1982
1983
1984
1985
1986
1987 local_bh_disable();
1988 bh_lock_sock(sk);
1989 WARN_ON(sock_owned_by_user(sk));
1990
1991 percpu_counter_inc(sk->sk_prot->orphan_count);
1992
1993
1994 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
1995 goto out;
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011 if (sk->sk_state == TCP_FIN_WAIT2) {
2012 struct tcp_sock *tp = tcp_sk(sk);
2013 if (tp->linger2 < 0) {
2014 tcp_set_state(sk, TCP_CLOSE);
2015 tcp_send_active_reset(sk, GFP_ATOMIC);
2016 NET_INC_STATS_BH(sock_net(sk),
2017 LINUX_MIB_TCPABORTONLINGER);
2018 } else {
2019 const int tmo = tcp_fin_time(sk);
2020
2021 if (tmo > TCP_TIMEWAIT_LEN) {
2022 inet_csk_reset_keepalive_timer(sk,
2023 tmo - TCP_TIMEWAIT_LEN);
2024 } else {
2025 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
2026 goto out;
2027 }
2028 }
2029 }
2030 if (sk->sk_state != TCP_CLOSE) {
2031 sk_mem_reclaim(sk);
2032 if (tcp_check_oom(sk, 0)) {
2033 tcp_set_state(sk, TCP_CLOSE);
2034 tcp_send_active_reset(sk, GFP_ATOMIC);
2035 NET_INC_STATS_BH(sock_net(sk),
2036 LINUX_MIB_TCPABORTONMEMORY);
2037 }
2038 }
2039
2040 if (sk->sk_state == TCP_CLOSE)
2041 inet_csk_destroy_sock(sk);
2042
2043
2044out:
2045 bh_unlock_sock(sk);
2046 local_bh_enable();
2047 sock_put(sk);
2048}
2049EXPORT_SYMBOL(tcp_close);
2050
2051
2052
2053static inline int tcp_need_reset(int state)
2054{
2055 return (1 << state) &
2056 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
2057 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
2058}
2059
2060int tcp_disconnect(struct sock *sk, int flags)
2061{
2062 struct inet_sock *inet = inet_sk(sk);
2063 struct inet_connection_sock *icsk = inet_csk(sk);
2064 struct tcp_sock *tp = tcp_sk(sk);
2065 int err = 0;
2066 int old_state = sk->sk_state;
2067
2068 if (old_state != TCP_CLOSE)
2069 tcp_set_state(sk, TCP_CLOSE);
2070
2071
2072 if (old_state == TCP_LISTEN) {
2073 inet_csk_listen_stop(sk);
2074 } else if (tcp_need_reset(old_state) ||
2075 (tp->snd_nxt != tp->write_seq &&
2076 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2077
2078
2079
2080 tcp_send_active_reset(sk, gfp_any());
2081 sk->sk_err = ECONNRESET;
2082 } else if (old_state == TCP_SYN_SENT)
2083 sk->sk_err = ECONNRESET;
2084
2085 tcp_clear_xmit_timers(sk);
2086 __skb_queue_purge(&sk->sk_receive_queue);
2087 tcp_write_queue_purge(sk);
2088 __skb_queue_purge(&tp->out_of_order_queue);
2089#ifdef CONFIG_NET_DMA
2090 __skb_queue_purge(&sk->sk_async_wait_queue);
2091#endif
2092
2093 inet->inet_dport = 0;
2094
2095 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
2096 inet_reset_saddr(sk);
2097
2098 sk->sk_shutdown = 0;
2099 sock_reset_flag(sk, SOCK_DONE);
2100 tp->srtt = 0;
2101 if ((tp->write_seq += tp->max_window + 2) == 0)
2102 tp->write_seq = 1;
2103 icsk->icsk_backoff = 0;
2104 tp->snd_cwnd = 2;
2105 icsk->icsk_probes_out = 0;
2106 tp->packets_out = 0;
2107 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
2108 tp->snd_cwnd_cnt = 0;
2109 tp->bytes_acked = 0;
2110 tp->window_clamp = 0;
2111 tcp_set_ca_state(sk, TCP_CA_Open);
2112 tcp_clear_retrans(tp);
2113 inet_csk_delack_init(sk);
2114 tcp_init_send_head(sk);
2115 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
2116 __sk_dst_reset(sk);
2117
2118 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
2119
2120 sk->sk_error_report(sk);
2121 return err;
2122}
2123EXPORT_SYMBOL(tcp_disconnect);
2124
2125
2126
2127
2128static int do_tcp_setsockopt(struct sock *sk, int level,
2129 int optname, char __user *optval, unsigned int optlen)
2130{
2131 struct tcp_sock *tp = tcp_sk(sk);
2132 struct inet_connection_sock *icsk = inet_csk(sk);
2133 int val;
2134 int err = 0;
2135
2136
2137 switch (optname) {
2138 case TCP_CONGESTION: {
2139 char name[TCP_CA_NAME_MAX];
2140
2141 if (optlen < 1)
2142 return -EINVAL;
2143
2144 val = strncpy_from_user(name, optval,
2145 min_t(long, TCP_CA_NAME_MAX-1, optlen));
2146 if (val < 0)
2147 return -EFAULT;
2148 name[val] = 0;
2149
2150 lock_sock(sk);
2151 err = tcp_set_congestion_control(sk, name);
2152 release_sock(sk);
2153 return err;
2154 }
2155 case TCP_COOKIE_TRANSACTIONS: {
2156 struct tcp_cookie_transactions ctd;
2157 struct tcp_cookie_values *cvp = NULL;
2158
2159 if (sizeof(ctd) > optlen)
2160 return -EINVAL;
2161 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2162 return -EFAULT;
2163
2164 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2165 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2166 return -EINVAL;
2167
2168 if (ctd.tcpct_cookie_desired == 0) {
2169
2170 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2171 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2172 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2173 return -EINVAL;
2174 }
2175
2176 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2177
2178 lock_sock(sk);
2179 if (tp->cookie_values != NULL) {
2180 kref_put(&tp->cookie_values->kref,
2181 tcp_cookie_values_release);
2182 tp->cookie_values = NULL;
2183 }
2184 tp->rx_opt.cookie_in_always = 0;
2185 tp->rx_opt.cookie_out_never = 1;
2186 release_sock(sk);
2187 return err;
2188 }
2189
2190
2191
2192 if (ctd.tcpct_used > 0 ||
2193 (tp->cookie_values == NULL &&
2194 (sysctl_tcp_cookie_size > 0 ||
2195 ctd.tcpct_cookie_desired > 0 ||
2196 ctd.tcpct_s_data_desired > 0))) {
2197 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2198 GFP_KERNEL);
2199 if (cvp == NULL)
2200 return -ENOMEM;
2201
2202 kref_init(&cvp->kref);
2203 }
2204 lock_sock(sk);
2205 tp->rx_opt.cookie_in_always =
2206 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2207 tp->rx_opt.cookie_out_never = 0;
2208
2209 if (tp->cookie_values != NULL) {
2210 if (cvp != NULL) {
2211
2212
2213
2214
2215 kref_put(&tp->cookie_values->kref,
2216 tcp_cookie_values_release);
2217 } else {
2218 cvp = tp->cookie_values;
2219 }
2220 }
2221
2222 if (cvp != NULL) {
2223 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2224
2225 if (ctd.tcpct_used > 0) {
2226 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2227 ctd.tcpct_used);
2228 cvp->s_data_desired = ctd.tcpct_used;
2229 cvp->s_data_constant = 1;
2230 } else {
2231
2232 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2233 cvp->s_data_constant = 0;
2234 }
2235
2236 tp->cookie_values = cvp;
2237 }
2238 release_sock(sk);
2239 return err;
2240 }
2241 default:
2242
2243 break;
2244 }
2245
2246 if (optlen < sizeof(int))
2247 return -EINVAL;
2248
2249 if (get_user(val, (int __user *)optval))
2250 return -EFAULT;
2251
2252 lock_sock(sk);
2253
2254 switch (optname) {
2255 case TCP_MAXSEG:
2256
2257
2258
2259 if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) {
2260 err = -EINVAL;
2261 break;
2262 }
2263 tp->rx_opt.user_mss = val;
2264 break;
2265
2266 case TCP_NODELAY:
2267 if (val) {
2268
2269
2270
2271
2272
2273
2274
2275
2276 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
2277 tcp_push_pending_frames(sk);
2278 } else {
2279 tp->nonagle &= ~TCP_NAGLE_OFF;
2280 }
2281 break;
2282
2283 case TCP_THIN_LINEAR_TIMEOUTS:
2284 if (val < 0 || val > 1)
2285 err = -EINVAL;
2286 else
2287 tp->thin_lto = val;
2288 break;
2289
2290 case TCP_THIN_DUPACK:
2291 if (val < 0 || val > 1)
2292 err = -EINVAL;
2293 else
2294 tp->thin_dupack = val;
2295 break;
2296
2297 case TCP_CORK:
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309 if (val) {
2310 tp->nonagle |= TCP_NAGLE_CORK;
2311 } else {
2312 tp->nonagle &= ~TCP_NAGLE_CORK;
2313 if (tp->nonagle&TCP_NAGLE_OFF)
2314 tp->nonagle |= TCP_NAGLE_PUSH;
2315 tcp_push_pending_frames(sk);
2316 }
2317 break;
2318
2319 case TCP_KEEPIDLE:
2320 if (val < 1 || val > MAX_TCP_KEEPIDLE)
2321 err = -EINVAL;
2322 else {
2323 tp->keepalive_time = val * HZ;
2324 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2325 !((1 << sk->sk_state) &
2326 (TCPF_CLOSE | TCPF_LISTEN))) {
2327 u32 elapsed = keepalive_time_elapsed(tp);
2328 if (tp->keepalive_time > elapsed)
2329 elapsed = tp->keepalive_time - elapsed;
2330 else
2331 elapsed = 0;
2332 inet_csk_reset_keepalive_timer(sk, elapsed);
2333 }
2334 }
2335 break;
2336 case TCP_KEEPINTVL:
2337 if (val < 1 || val > MAX_TCP_KEEPINTVL)
2338 err = -EINVAL;
2339 else
2340 tp->keepalive_intvl = val * HZ;
2341 break;
2342 case TCP_KEEPCNT:
2343 if (val < 1 || val > MAX_TCP_KEEPCNT)
2344 err = -EINVAL;
2345 else
2346 tp->keepalive_probes = val;
2347 break;
2348 case TCP_SYNCNT:
2349 if (val < 1 || val > MAX_TCP_SYNCNT)
2350 err = -EINVAL;
2351 else
2352 icsk->icsk_syn_retries = val;
2353 break;
2354
2355 case TCP_LINGER2:
2356 if (val < 0)
2357 tp->linger2 = -1;
2358 else if (val > sysctl_tcp_fin_timeout / HZ)
2359 tp->linger2 = 0;
2360 else
2361 tp->linger2 = val * HZ;
2362 break;
2363
2364 case TCP_DEFER_ACCEPT:
2365
2366 icsk->icsk_accept_queue.rskq_defer_accept =
2367 secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
2368 TCP_RTO_MAX / HZ);
2369 break;
2370
2371 case TCP_WINDOW_CLAMP:
2372 if (!val) {
2373 if (sk->sk_state != TCP_CLOSE) {
2374 err = -EINVAL;
2375 break;
2376 }
2377 tp->window_clamp = 0;
2378 } else
2379 tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
2380 SOCK_MIN_RCVBUF / 2 : val;
2381 break;
2382
2383 case TCP_QUICKACK:
2384 if (!val) {
2385 icsk->icsk_ack.pingpong = 1;
2386 } else {
2387 icsk->icsk_ack.pingpong = 0;
2388 if ((1 << sk->sk_state) &
2389 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
2390 inet_csk_ack_scheduled(sk)) {
2391 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
2392 tcp_cleanup_rbuf(sk, 1);
2393 if (!(val & 1))
2394 icsk->icsk_ack.pingpong = 1;
2395 }
2396 }
2397 break;
2398
2399#ifdef CONFIG_TCP_MD5SIG
2400 case TCP_MD5SIG:
2401
2402 err = tp->af_specific->md5_parse(sk, optval, optlen);
2403 break;
2404#endif
2405 case TCP_USER_TIMEOUT:
2406
2407
2408
2409 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2410 break;
2411 default:
2412 err = -ENOPROTOOPT;
2413 break;
2414 }
2415
2416 release_sock(sk);
2417 return err;
2418}
2419
2420int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2421 unsigned int optlen)
2422{
2423 const struct inet_connection_sock *icsk = inet_csk(sk);
2424
2425 if (level != SOL_TCP)
2426 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
2427 optval, optlen);
2428 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2429}
2430EXPORT_SYMBOL(tcp_setsockopt);
2431
2432#ifdef CONFIG_COMPAT
2433int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2434 char __user *optval, unsigned int optlen)
2435{
2436 if (level != SOL_TCP)
2437 return inet_csk_compat_setsockopt(sk, level, optname,
2438 optval, optlen);
2439 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2440}
2441EXPORT_SYMBOL(compat_tcp_setsockopt);
2442#endif
2443
2444
2445void tcp_get_info(const struct sock *sk, struct tcp_info *info)
2446{
2447 const struct tcp_sock *tp = tcp_sk(sk);
2448 const struct inet_connection_sock *icsk = inet_csk(sk);
2449 u32 now = tcp_time_stamp;
2450
2451 memset(info, 0, sizeof(*info));
2452
2453 info->tcpi_state = sk->sk_state;
2454 info->tcpi_ca_state = icsk->icsk_ca_state;
2455 info->tcpi_retransmits = icsk->icsk_retransmits;
2456 info->tcpi_probes = icsk->icsk_probes_out;
2457 info->tcpi_backoff = icsk->icsk_backoff;
2458
2459 if (tp->rx_opt.tstamp_ok)
2460 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
2461 if (tcp_is_sack(tp))
2462 info->tcpi_options |= TCPI_OPT_SACK;
2463 if (tp->rx_opt.wscale_ok) {
2464 info->tcpi_options |= TCPI_OPT_WSCALE;
2465 info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
2466 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
2467 }
2468
2469 if (tp->ecn_flags & TCP_ECN_OK)
2470 info->tcpi_options |= TCPI_OPT_ECN;
2471 if (tp->ecn_flags & TCP_ECN_SEEN)
2472 info->tcpi_options |= TCPI_OPT_ECN_SEEN;
2473
2474 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
2475 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
2476 info->tcpi_snd_mss = tp->mss_cache;
2477 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2478
2479 if (sk->sk_state == TCP_LISTEN) {
2480 info->tcpi_unacked = sk->sk_ack_backlog;
2481 info->tcpi_sacked = sk->sk_max_ack_backlog;
2482 } else {
2483 info->tcpi_unacked = tp->packets_out;
2484 info->tcpi_sacked = tp->sacked_out;
2485 }
2486 info->tcpi_lost = tp->lost_out;
2487 info->tcpi_retrans = tp->retrans_out;
2488 info->tcpi_fackets = tp->fackets_out;
2489
2490 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
2491 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
2492 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
2493
2494 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
2495 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
2496 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
2497 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
2498 info->tcpi_snd_ssthresh = tp->snd_ssthresh;
2499 info->tcpi_snd_cwnd = tp->snd_cwnd;
2500 info->tcpi_advmss = tp->advmss;
2501 info->tcpi_reordering = tp->reordering;
2502
2503 info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
2504 info->tcpi_rcv_space = tp->rcvq_space.space;
2505
2506 info->tcpi_total_retrans = tp->total_retrans;
2507}
2508EXPORT_SYMBOL_GPL(tcp_get_info);
2509
2510static int do_tcp_getsockopt(struct sock *sk, int level,
2511 int optname, char __user *optval, int __user *optlen)
2512{
2513 struct inet_connection_sock *icsk = inet_csk(sk);
2514 struct tcp_sock *tp = tcp_sk(sk);
2515 int val, len;
2516
2517 if (get_user(len, optlen))
2518 return -EFAULT;
2519
2520 len = min_t(unsigned int, len, sizeof(int));
2521
2522 if (len < 0)
2523 return -EINVAL;
2524
2525 switch (optname) {
2526 case TCP_MAXSEG:
2527 val = tp->mss_cache;
2528 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2529 val = tp->rx_opt.user_mss;
2530 break;
2531 case TCP_NODELAY:
2532 val = !!(tp->nonagle&TCP_NAGLE_OFF);
2533 break;
2534 case TCP_CORK:
2535 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2536 break;
2537 case TCP_KEEPIDLE:
2538 val = keepalive_time_when(tp) / HZ;
2539 break;
2540 case TCP_KEEPINTVL:
2541 val = keepalive_intvl_when(tp) / HZ;
2542 break;
2543 case TCP_KEEPCNT:
2544 val = keepalive_probes(tp);
2545 break;
2546 case TCP_SYNCNT:
2547 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
2548 break;
2549 case TCP_LINGER2:
2550 val = tp->linger2;
2551 if (val >= 0)
2552 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2553 break;
2554 case TCP_DEFER_ACCEPT:
2555 val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
2556 TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
2557 break;
2558 case TCP_WINDOW_CLAMP:
2559 val = tp->window_clamp;
2560 break;
2561 case TCP_INFO: {
2562 struct tcp_info info;
2563
2564 if (get_user(len, optlen))
2565 return -EFAULT;
2566
2567 tcp_get_info(sk, &info);
2568
2569 len = min_t(unsigned int, len, sizeof(info));
2570 if (put_user(len, optlen))
2571 return -EFAULT;
2572 if (copy_to_user(optval, &info, len))
2573 return -EFAULT;
2574 return 0;
2575 }
2576 case TCP_QUICKACK:
2577 val = !icsk->icsk_ack.pingpong;
2578 break;
2579
2580 case TCP_CONGESTION:
2581 if (get_user(len, optlen))
2582 return -EFAULT;
2583 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
2584 if (put_user(len, optlen))
2585 return -EFAULT;
2586 if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
2587 return -EFAULT;
2588 return 0;
2589
2590 case TCP_COOKIE_TRANSACTIONS: {
2591 struct tcp_cookie_transactions ctd;
2592 struct tcp_cookie_values *cvp = tp->cookie_values;
2593
2594 if (get_user(len, optlen))
2595 return -EFAULT;
2596 if (len < sizeof(ctd))
2597 return -EINVAL;
2598
2599 memset(&ctd, 0, sizeof(ctd));
2600 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2601 TCP_COOKIE_IN_ALWAYS : 0)
2602 | (tp->rx_opt.cookie_out_never ?
2603 TCP_COOKIE_OUT_NEVER : 0);
2604
2605 if (cvp != NULL) {
2606 ctd.tcpct_flags |= (cvp->s_data_in ?
2607 TCP_S_DATA_IN : 0)
2608 | (cvp->s_data_out ?
2609 TCP_S_DATA_OUT : 0);
2610
2611 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2612 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2613
2614 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2615 cvp->cookie_pair_size);
2616 ctd.tcpct_used = cvp->cookie_pair_size;
2617 }
2618
2619 if (put_user(sizeof(ctd), optlen))
2620 return -EFAULT;
2621 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2622 return -EFAULT;
2623 return 0;
2624 }
2625 case TCP_THIN_LINEAR_TIMEOUTS:
2626 val = tp->thin_lto;
2627 break;
2628 case TCP_THIN_DUPACK:
2629 val = tp->thin_dupack;
2630 break;
2631
2632 case TCP_USER_TIMEOUT:
2633 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2634 break;
2635 default:
2636 return -ENOPROTOOPT;
2637 }
2638
2639 if (put_user(len, optlen))
2640 return -EFAULT;
2641 if (copy_to_user(optval, &val, len))
2642 return -EFAULT;
2643 return 0;
2644}
2645
2646int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2647 int __user *optlen)
2648{
2649 struct inet_connection_sock *icsk = inet_csk(sk);
2650
2651 if (level != SOL_TCP)
2652 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
2653 optval, optlen);
2654 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2655}
2656EXPORT_SYMBOL(tcp_getsockopt);
2657
2658#ifdef CONFIG_COMPAT
2659int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2660 char __user *optval, int __user *optlen)
2661{
2662 if (level != SOL_TCP)
2663 return inet_csk_compat_getsockopt(sk, level, optname,
2664 optval, optlen);
2665 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2666}
2667EXPORT_SYMBOL(compat_tcp_getsockopt);
2668#endif
2669
2670struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
2671 netdev_features_t features)
2672{
2673 struct sk_buff *segs = ERR_PTR(-EINVAL);
2674 struct tcphdr *th;
2675 unsigned thlen;
2676 unsigned int seq;
2677 __be32 delta;
2678 unsigned int oldlen;
2679 unsigned int mss;
2680
2681 if (!pskb_may_pull(skb, sizeof(*th)))
2682 goto out;
2683
2684 th = tcp_hdr(skb);
2685 thlen = th->doff * 4;
2686 if (thlen < sizeof(*th))
2687 goto out;
2688
2689 if (!pskb_may_pull(skb, thlen))
2690 goto out;
2691
2692 oldlen = (u16)~skb->len;
2693 __skb_pull(skb, thlen);
2694
2695 mss = skb_shinfo(skb)->gso_size;
2696 if (unlikely(skb->len <= mss))
2697 goto out;
2698
2699 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2700
2701 int type = skb_shinfo(skb)->gso_type;
2702
2703 if (unlikely(type &
2704 ~(SKB_GSO_TCPV4 |
2705 SKB_GSO_DODGY |
2706 SKB_GSO_TCP_ECN |
2707 SKB_GSO_TCPV6 |
2708 0) ||
2709 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
2710 goto out;
2711
2712 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
2713
2714 segs = NULL;
2715 goto out;
2716 }
2717
2718 segs = skb_segment(skb, features);
2719 if (IS_ERR(segs))
2720 goto out;
2721
2722 delta = htonl(oldlen + (thlen + mss));
2723
2724 skb = segs;
2725 th = tcp_hdr(skb);
2726 seq = ntohl(th->seq);
2727
2728 do {
2729 th->fin = th->psh = 0;
2730
2731 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2732 (__force u32)delta));
2733 if (skb->ip_summed != CHECKSUM_PARTIAL)
2734 th->check =
2735 csum_fold(csum_partial(skb_transport_header(skb),
2736 thlen, skb->csum));
2737
2738 seq += mss;
2739 skb = skb->next;
2740 th = tcp_hdr(skb);
2741
2742 th->seq = htonl(seq);
2743 th->cwr = 0;
2744 } while (skb->next);
2745
2746 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
2747 skb->data_len);
2748 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2749 (__force u32)delta));
2750 if (skb->ip_summed != CHECKSUM_PARTIAL)
2751 th->check = csum_fold(csum_partial(skb_transport_header(skb),
2752 thlen, skb->csum));
2753
2754out:
2755 return segs;
2756}
2757EXPORT_SYMBOL(tcp_tso_segment);
2758
2759struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2760{
2761 struct sk_buff **pp = NULL;
2762 struct sk_buff *p;
2763 struct tcphdr *th;
2764 struct tcphdr *th2;
2765 unsigned int len;
2766 unsigned int thlen;
2767 __be32 flags;
2768 unsigned int mss = 1;
2769 unsigned int hlen;
2770 unsigned int off;
2771 int flush = 1;
2772 int i;
2773
2774 off = skb_gro_offset(skb);
2775 hlen = off + sizeof(*th);
2776 th = skb_gro_header_fast(skb, off);
2777 if (skb_gro_header_hard(skb, hlen)) {
2778 th = skb_gro_header_slow(skb, hlen, off);
2779 if (unlikely(!th))
2780 goto out;
2781 }
2782
2783 thlen = th->doff * 4;
2784 if (thlen < sizeof(*th))
2785 goto out;
2786
2787 hlen = off + thlen;
2788 if (skb_gro_header_hard(skb, hlen)) {
2789 th = skb_gro_header_slow(skb, hlen, off);
2790 if (unlikely(!th))
2791 goto out;
2792 }
2793
2794 skb_gro_pull(skb, thlen);
2795
2796 len = skb_gro_len(skb);
2797 flags = tcp_flag_word(th);
2798
2799 for (; (p = *head); head = &p->next) {
2800 if (!NAPI_GRO_CB(p)->same_flow)
2801 continue;
2802
2803 th2 = tcp_hdr(p);
2804
2805 if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
2806 NAPI_GRO_CB(p)->same_flow = 0;
2807 continue;
2808 }
2809
2810 goto found;
2811 }
2812
2813 goto out_check_final;
2814
2815found:
2816 flush = NAPI_GRO_CB(p)->flush;
2817 flush |= (__force int)(flags & TCP_FLAG_CWR);
2818 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
2819 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
2820 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
2821 for (i = sizeof(*th); i < thlen; i += 4)
2822 flush |= *(u32 *)((u8 *)th + i) ^
2823 *(u32 *)((u8 *)th2 + i);
2824
2825 mss = skb_shinfo(p)->gso_size;
2826
2827 flush |= (len - 1) >= mss;
2828 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
2829
2830 if (flush || skb_gro_receive(head, skb)) {
2831 mss = 1;
2832 goto out_check_final;
2833 }
2834
2835 p = *head;
2836 th2 = tcp_hdr(p);
2837 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
2838
2839out_check_final:
2840 flush = len < mss;
2841 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
2842 TCP_FLAG_RST | TCP_FLAG_SYN |
2843 TCP_FLAG_FIN));
2844
2845 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
2846 pp = head;
2847
2848out:
2849 NAPI_GRO_CB(skb)->flush |= flush;
2850
2851 return pp;
2852}
2853EXPORT_SYMBOL(tcp_gro_receive);
2854
2855int tcp_gro_complete(struct sk_buff *skb)
2856{
2857 struct tcphdr *th = tcp_hdr(skb);
2858
2859 skb->csum_start = skb_transport_header(skb) - skb->head;
2860 skb->csum_offset = offsetof(struct tcphdr, check);
2861 skb->ip_summed = CHECKSUM_PARTIAL;
2862
2863 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
2864
2865 if (th->cwr)
2866 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
2867
2868 return 0;
2869}
2870EXPORT_SYMBOL(tcp_gro_complete);
2871
2872#ifdef CONFIG_TCP_MD5SIG
2873static unsigned long tcp_md5sig_users;
2874static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
2875static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2876
2877static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
2878{
2879 int cpu;
2880
2881 for_each_possible_cpu(cpu) {
2882 struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
2883
2884 if (p->md5_desc.tfm)
2885 crypto_free_hash(p->md5_desc.tfm);
2886 }
2887 free_percpu(pool);
2888}
2889
2890void tcp_free_md5sig_pool(void)
2891{
2892 struct tcp_md5sig_pool __percpu *pool = NULL;
2893
2894 spin_lock_bh(&tcp_md5sig_pool_lock);
2895 if (--tcp_md5sig_users == 0) {
2896 pool = tcp_md5sig_pool;
2897 tcp_md5sig_pool = NULL;
2898 }
2899 spin_unlock_bh(&tcp_md5sig_pool_lock);
2900 if (pool)
2901 __tcp_free_md5sig_pool(pool);
2902}
2903EXPORT_SYMBOL(tcp_free_md5sig_pool);
2904
2905static struct tcp_md5sig_pool __percpu *
2906__tcp_alloc_md5sig_pool(struct sock *sk)
2907{
2908 int cpu;
2909 struct tcp_md5sig_pool __percpu *pool;
2910
2911 pool = alloc_percpu(struct tcp_md5sig_pool);
2912 if (!pool)
2913 return NULL;
2914
2915 for_each_possible_cpu(cpu) {
2916 struct crypto_hash *hash;
2917
2918 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
2919 if (!hash || IS_ERR(hash))
2920 goto out_free;
2921
2922 per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
2923 }
2924 return pool;
2925out_free:
2926 __tcp_free_md5sig_pool(pool);
2927 return NULL;
2928}
2929
2930struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
2931{
2932 struct tcp_md5sig_pool __percpu *pool;
2933 int alloc = 0;
2934
2935retry:
2936 spin_lock_bh(&tcp_md5sig_pool_lock);
2937 pool = tcp_md5sig_pool;
2938 if (tcp_md5sig_users++ == 0) {
2939 alloc = 1;
2940 spin_unlock_bh(&tcp_md5sig_pool_lock);
2941 } else if (!pool) {
2942 tcp_md5sig_users--;
2943 spin_unlock_bh(&tcp_md5sig_pool_lock);
2944 cpu_relax();
2945 goto retry;
2946 } else
2947 spin_unlock_bh(&tcp_md5sig_pool_lock);
2948
2949 if (alloc) {
2950
2951 struct tcp_md5sig_pool __percpu *p;
2952
2953 p = __tcp_alloc_md5sig_pool(sk);
2954 spin_lock_bh(&tcp_md5sig_pool_lock);
2955 if (!p) {
2956 tcp_md5sig_users--;
2957 spin_unlock_bh(&tcp_md5sig_pool_lock);
2958 return NULL;
2959 }
2960 pool = tcp_md5sig_pool;
2961 if (pool) {
2962
2963 spin_unlock_bh(&tcp_md5sig_pool_lock);
2964 __tcp_free_md5sig_pool(p);
2965 } else {
2966 tcp_md5sig_pool = pool = p;
2967 spin_unlock_bh(&tcp_md5sig_pool_lock);
2968 }
2969 }
2970 return pool;
2971}
2972EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2983{
2984 struct tcp_md5sig_pool __percpu *p;
2985
2986 local_bh_disable();
2987
2988 spin_lock(&tcp_md5sig_pool_lock);
2989 p = tcp_md5sig_pool;
2990 if (p)
2991 tcp_md5sig_users++;
2992 spin_unlock(&tcp_md5sig_pool_lock);
2993
2994 if (p)
2995 return this_cpu_ptr(p);
2996
2997 local_bh_enable();
2998 return NULL;
2999}
3000EXPORT_SYMBOL(tcp_get_md5sig_pool);
3001
3002void tcp_put_md5sig_pool(void)
3003{
3004 local_bh_enable();
3005 tcp_free_md5sig_pool();
3006}
3007EXPORT_SYMBOL(tcp_put_md5sig_pool);
3008
3009int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
3010 const struct tcphdr *th)
3011{
3012 struct scatterlist sg;
3013 struct tcphdr hdr;
3014 int err;
3015
3016
3017 memcpy(&hdr, th, sizeof(hdr));
3018 hdr.check = 0;
3019
3020
3021 sg_init_one(&sg, &hdr, sizeof(hdr));
3022 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr));
3023 return err;
3024}
3025EXPORT_SYMBOL(tcp_md5_hash_header);
3026
3027int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
3028 const struct sk_buff *skb, unsigned int header_len)
3029{
3030 struct scatterlist sg;
3031 const struct tcphdr *tp = tcp_hdr(skb);
3032 struct hash_desc *desc = &hp->md5_desc;
3033 unsigned i;
3034 const unsigned head_data_len = skb_headlen(skb) > header_len ?
3035 skb_headlen(skb) - header_len : 0;
3036 const struct skb_shared_info *shi = skb_shinfo(skb);
3037 struct sk_buff *frag_iter;
3038
3039 sg_init_table(&sg, 1);
3040
3041 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
3042 if (crypto_hash_update(desc, &sg, head_data_len))
3043 return 1;
3044
3045 for (i = 0; i < shi->nr_frags; ++i) {
3046 const struct skb_frag_struct *f = &shi->frags[i];
3047 struct page *page = skb_frag_page(f);
3048 sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
3049 if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
3050 return 1;
3051 }
3052
3053 skb_walk_frags(skb, frag_iter)
3054 if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
3055 return 1;
3056
3057 return 0;
3058}
3059EXPORT_SYMBOL(tcp_md5_hash_skb_data);
3060
3061int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
3062{
3063 struct scatterlist sg;
3064
3065 sg_init_one(&sg, key->key, key->keylen);
3066 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
3067}
3068EXPORT_SYMBOL(tcp_md5_hash_key);
3069
3070#endif
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096struct tcp_cookie_secret {
3097
3098
3099
3100
3101
3102 u32 secrets[COOKIE_WORKSPACE_WORDS];
3103 unsigned long expires;
3104};
3105
3106#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3107#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3108#define TCP_SECRET_LIFE (HZ * 600)
3109
3110static struct tcp_cookie_secret tcp_secret_one;
3111static struct tcp_cookie_secret tcp_secret_two;
3112
3113
3114static struct tcp_cookie_secret *tcp_secret_generating;
3115static struct tcp_cookie_secret *tcp_secret_primary;
3116static struct tcp_cookie_secret *tcp_secret_retiring;
3117static struct tcp_cookie_secret *tcp_secret_secondary;
3118
3119static DEFINE_SPINLOCK(tcp_secret_locker);
3120
3121
3122
3123static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3124{
3125 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3126}
3127
3128
3129
3130
3131
3132int tcp_cookie_generator(u32 *bakery)
3133{
3134 unsigned long jiffy = jiffies;
3135
3136 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3137 spin_lock_bh(&tcp_secret_locker);
3138 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3139
3140 memcpy(bakery,
3141 &tcp_secret_generating->secrets[0],
3142 COOKIE_WORKSPACE_WORDS);
3143 } else {
3144
3145 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156 if (unlikely(tcp_secret_primary->expires ==
3157 tcp_secret_secondary->expires)) {
3158 struct timespec tv;
3159
3160 getnstimeofday(&tv);
3161 bakery[COOKIE_DIGEST_WORDS+0] ^=
3162 (u32)tv.tv_nsec;
3163
3164 tcp_secret_secondary->expires = jiffy
3165 + TCP_SECRET_1MSL
3166 + (0x0f & tcp_cookie_work(bakery, 0));
3167 } else {
3168 tcp_secret_secondary->expires = jiffy
3169 + TCP_SECRET_LIFE
3170 + (0xff & tcp_cookie_work(bakery, 1));
3171 tcp_secret_primary->expires = jiffy
3172 + TCP_SECRET_2MSL
3173 + (0x1f & tcp_cookie_work(bakery, 2));
3174 }
3175 memcpy(&tcp_secret_secondary->secrets[0],
3176 bakery, COOKIE_WORKSPACE_WORDS);
3177
3178 rcu_assign_pointer(tcp_secret_generating,
3179 tcp_secret_secondary);
3180 rcu_assign_pointer(tcp_secret_retiring,
3181 tcp_secret_primary);
3182
3183
3184
3185
3186
3187
3188 }
3189 spin_unlock_bh(&tcp_secret_locker);
3190 } else {
3191 rcu_read_lock_bh();
3192 memcpy(bakery,
3193 &rcu_dereference(tcp_secret_generating)->secrets[0],
3194 COOKIE_WORKSPACE_WORDS);
3195 rcu_read_unlock_bh();
3196 }
3197 return 0;
3198}
3199EXPORT_SYMBOL(tcp_cookie_generator);
3200
3201void tcp_done(struct sock *sk)
3202{
3203 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
3204 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
3205
3206 tcp_set_state(sk, TCP_CLOSE);
3207 tcp_clear_xmit_timers(sk);
3208
3209 sk->sk_shutdown = SHUTDOWN_MASK;
3210
3211 if (!sock_flag(sk, SOCK_DEAD))
3212 sk->sk_state_change(sk);
3213 else
3214 inet_csk_destroy_sock(sk);
3215}
3216EXPORT_SYMBOL_GPL(tcp_done);
3217
3218extern struct tcp_congestion_ops tcp_reno;
3219
3220static __initdata unsigned long thash_entries;
3221static int __init set_thash_entries(char *str)
3222{
3223 if (!str)
3224 return 0;
3225 thash_entries = simple_strtoul(str, &str, 0);
3226 return 1;
3227}
3228__setup("thash_entries=", set_thash_entries);
3229
3230void tcp_init_mem(struct net *net)
3231{
3232 unsigned long limit = nr_free_buffer_pages() / 8;
3233 limit = max(limit, 128UL);
3234 net->ipv4.sysctl_tcp_mem[0] = limit / 4 * 3;
3235 net->ipv4.sysctl_tcp_mem[1] = limit;
3236 net->ipv4.sysctl_tcp_mem[2] = net->ipv4.sysctl_tcp_mem[0] * 2;
3237}
3238
3239void __init tcp_init(void)
3240{
3241 struct sk_buff *skb = NULL;
3242 unsigned long limit;
3243 int max_share, cnt;
3244 unsigned int i;
3245 unsigned long jiffy = jiffies;
3246
3247 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
3248
3249 percpu_counter_init(&tcp_sockets_allocated, 0);
3250 percpu_counter_init(&tcp_orphan_count, 0);
3251 tcp_hashinfo.bind_bucket_cachep =
3252 kmem_cache_create("tcp_bind_bucket",
3253 sizeof(struct inet_bind_bucket), 0,
3254 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3255
3256
3257
3258
3259
3260
3261 tcp_hashinfo.ehash =
3262 alloc_large_system_hash("TCP established",
3263 sizeof(struct inet_ehash_bucket),
3264 thash_entries,
3265 (totalram_pages >= 128 * 1024) ?
3266 13 : 15,
3267 0,
3268 NULL,
3269 &tcp_hashinfo.ehash_mask,
3270 thash_entries ? 0 : 512 * 1024);
3271 for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) {
3272 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i);
3273 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i);
3274 }
3275 if (inet_ehash_locks_alloc(&tcp_hashinfo))
3276 panic("TCP: failed to alloc ehash_locks");
3277 tcp_hashinfo.bhash =
3278 alloc_large_system_hash("TCP bind",
3279 sizeof(struct inet_bind_hashbucket),
3280 tcp_hashinfo.ehash_mask + 1,
3281 (totalram_pages >= 128 * 1024) ?
3282 13 : 15,
3283 0,
3284 &tcp_hashinfo.bhash_size,
3285 NULL,
3286 64 * 1024);
3287 tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
3288 for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
3289 spin_lock_init(&tcp_hashinfo.bhash[i].lock);
3290 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
3291 }
3292
3293
3294 cnt = tcp_hashinfo.ehash_mask + 1;
3295
3296 tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
3297 sysctl_tcp_max_orphans = cnt / 2;
3298 sysctl_max_syn_backlog = max(128, cnt / 256);
3299
3300 tcp_init_mem(&init_net);
3301
3302 limit = nr_free_buffer_pages() << (PAGE_SHIFT - 10);
3303 limit = max(limit, 128UL);
3304 max_share = min(4UL*1024*1024, limit);
3305
3306 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
3307 sysctl_tcp_wmem[1] = 16*1024;
3308 sysctl_tcp_wmem[2] = max(64*1024, max_share);
3309
3310 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
3311 sysctl_tcp_rmem[1] = 87380;
3312 sysctl_tcp_rmem[2] = max(87380, max_share);
3313
3314 printk(KERN_INFO "TCP: Hash tables configured "
3315 "(established %u bind %u)\n",
3316 tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
3317
3318 tcp_register_congestion_control(&tcp_reno);
3319
3320 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3321 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3322 tcp_secret_one.expires = jiffy;
3323 tcp_secret_two.expires = jiffy;
3324 tcp_secret_generating = &tcp_secret_one;
3325 tcp_secret_primary = &tcp_secret_one;
3326 tcp_secret_retiring = &tcp_secret_two;
3327 tcp_secret_secondary = &tcp_secret_two;
3328}
3329