1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/sched.h>
53#include <linux/mm.h>
54#include <linux/string.h>
55#include <linux/errno.h>
56#include <linux/highmem.h>
57
58#include <linux/socket.h>
59#include <linux/sockios.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/etherdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/stat.h>
66#include <linux/init.h>
67
68#include <net/snmp.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/xfrm.h>
73#include <linux/skbuff.h>
74#include <net/sock.h>
75#include <net/arp.h>
76#include <net/icmp.h>
77#include <net/checksum.h>
78#include <net/inetpeer.h>
79#include <net/checksum.h>
80#include <linux/igmp.h>
81#include <linux/netfilter_ipv4.h>
82#include <linux/netfilter_bridge.h>
83#include <linux/mroute.h>
84#include <linux/netlink.h>
85#include <linux/tcp.h>
86
87int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
88
89
90__inline__ void ip_send_check(struct iphdr *iph)
91{
92 iph->check = 0;
93 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
94}
95
96
97static int ip_dev_loopback_xmit(struct sk_buff *newskb)
98{
99 newskb->mac.raw = newskb->data;
100 __skb_pull(newskb, newskb->nh.raw - newskb->data);
101 newskb->pkt_type = PACKET_LOOPBACK;
102 newskb->ip_summed = CHECKSUM_UNNECESSARY;
103 BUG_TRAP(newskb->dst);
104 netif_rx(newskb);
105 return 0;
106}
107
108static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
109{
110 int ttl = inet->uc_ttl;
111
112 if (ttl < 0)
113 ttl = dst_metric(dst, RTAX_HOPLIMIT);
114 return ttl;
115}
116
117
118
119
120
121int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
122 __be32 saddr, __be32 daddr, struct ip_options *opt)
123{
124 struct inet_sock *inet = inet_sk(sk);
125 struct rtable *rt = (struct rtable *)skb->dst;
126 struct iphdr *iph;
127
128
129 if (opt)
130 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
131 else
132 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
133
134 iph->version = 4;
135 iph->ihl = 5;
136 iph->tos = inet->tos;
137 if (ip_dont_fragment(sk, &rt->u.dst))
138 iph->frag_off = htons(IP_DF);
139 else
140 iph->frag_off = 0;
141 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
142 iph->daddr = rt->rt_dst;
143 iph->saddr = rt->rt_src;
144 iph->protocol = sk->sk_protocol;
145 iph->tot_len = htons(skb->len);
146 ip_select_ident(iph, &rt->u.dst, sk);
147 skb->nh.iph = iph;
148
149 if (opt && opt->optlen) {
150 iph->ihl += opt->optlen>>2;
151 ip_options_build(skb, opt, daddr, rt, 0);
152 }
153 ip_send_check(iph);
154
155 skb->priority = sk->sk_priority;
156
157
158 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
159 dst_output);
160}
161
162EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
163
164static inline int ip_finish_output2(struct sk_buff *skb)
165{
166 struct dst_entry *dst = skb->dst;
167 struct net_device *dev = dst->dev;
168 int hh_len = LL_RESERVED_SPACE(dev);
169
170
171 if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
172 struct sk_buff *skb2;
173
174 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
175 if (skb2 == NULL) {
176 kfree_skb(skb);
177 return -ENOMEM;
178 }
179 if (skb->sk)
180 skb_set_owner_w(skb2, skb->sk);
181 kfree_skb(skb);
182 skb = skb2;
183 }
184
185 if (dst->hh)
186 return neigh_hh_output(dst->hh, skb);
187 else if (dst->neighbour)
188 return dst->neighbour->output(skb);
189
190 if (net_ratelimit())
191 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
192 kfree_skb(skb);
193 return -EINVAL;
194}
195
196static inline int ip_finish_output(struct sk_buff *skb)
197{
198#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
199
200 if (skb->dst->xfrm != NULL) {
201 IPCB(skb)->flags |= IPSKB_REROUTED;
202 return dst_output(skb);
203 }
204#endif
205 if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
206 return ip_fragment(skb, ip_finish_output2);
207 else
208 return ip_finish_output2(skb);
209}
210
211int ip_mc_output(struct sk_buff *skb)
212{
213 struct sock *sk = skb->sk;
214 struct rtable *rt = (struct rtable*)skb->dst;
215 struct net_device *dev = rt->u.dst.dev;
216
217
218
219
220 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
221
222 skb->dev = dev;
223 skb->protocol = htons(ETH_P_IP);
224
225
226
227
228
229 if (rt->rt_flags&RTCF_MULTICAST) {
230 if ((!sk || inet_sk(sk)->mc_loop)
231#ifdef CONFIG_IP_MROUTE
232
233
234
235
236
237
238
239
240 && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
241#endif
242 ) {
243 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
244 if (newskb)
245 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
246 newskb->dev,
247 ip_dev_loopback_xmit);
248 }
249
250
251
252 if (skb->nh.iph->ttl == 0) {
253 kfree_skb(skb);
254 return 0;
255 }
256 }
257
258 if (rt->rt_flags&RTCF_BROADCAST) {
259 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
260 if (newskb)
261 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
262 newskb->dev, ip_dev_loopback_xmit);
263 }
264
265 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev,
266 ip_finish_output,
267 !(IPCB(skb)->flags & IPSKB_REROUTED));
268}
269
270int ip_output(struct sk_buff *skb)
271{
272 struct net_device *dev = skb->dst->dev;
273
274 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
275
276 skb->dev = dev;
277 skb->protocol = htons(ETH_P_IP);
278
279 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
280 ip_finish_output,
281 !(IPCB(skb)->flags & IPSKB_REROUTED));
282}
283
284int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
285{
286 struct sock *sk = skb->sk;
287 struct inet_sock *inet = inet_sk(sk);
288 struct ip_options *opt = inet->opt;
289 struct rtable *rt;
290 struct iphdr *iph;
291
292
293
294
295 rt = (struct rtable *) skb->dst;
296 if (rt != NULL)
297 goto packet_routed;
298
299
300 rt = (struct rtable *)__sk_dst_check(sk, 0);
301 if (rt == NULL) {
302 __be32 daddr;
303
304
305 daddr = inet->daddr;
306 if(opt && opt->srr)
307 daddr = opt->faddr;
308
309 {
310 struct flowi fl = { .oif = sk->sk_bound_dev_if,
311 .nl_u = { .ip4_u =
312 { .daddr = daddr,
313 .saddr = inet->saddr,
314 .tos = RT_CONN_FLAGS(sk) } },
315 .proto = sk->sk_protocol,
316 .uli_u = { .ports =
317 { .sport = inet->sport,
318 .dport = inet->dport } } };
319
320
321
322
323
324 security_sk_classify_flow(sk, &fl);
325 if (ip_route_output_flow(&rt, &fl, sk, 0))
326 goto no_route;
327 }
328 sk_setup_caps(sk, &rt->u.dst);
329 }
330 skb->dst = dst_clone(&rt->u.dst);
331
332packet_routed:
333 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
334 goto no_route;
335
336
337 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
338 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
339 iph->tot_len = htons(skb->len);
340 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
341 iph->frag_off = htons(IP_DF);
342 else
343 iph->frag_off = 0;
344 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
345 iph->protocol = sk->sk_protocol;
346 iph->saddr = rt->rt_src;
347 iph->daddr = rt->rt_dst;
348 skb->nh.iph = iph;
349
350
351 if (opt && opt->optlen) {
352 iph->ihl += opt->optlen >> 2;
353 ip_options_build(skb, opt, inet->daddr, rt, 0);
354 }
355
356 ip_select_ident_more(iph, &rt->u.dst, sk,
357 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
358
359
360 ip_send_check(iph);
361
362 skb->priority = sk->sk_priority;
363
364 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
365 dst_output);
366
367no_route:
368 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
369 kfree_skb(skb);
370 return -EHOSTUNREACH;
371}
372
373
374static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
375{
376 to->pkt_type = from->pkt_type;
377 to->priority = from->priority;
378 to->protocol = from->protocol;
379 dst_release(to->dst);
380 to->dst = dst_clone(from->dst);
381 to->dev = from->dev;
382 to->mark = from->mark;
383
384
385 IPCB(to)->flags = IPCB(from)->flags;
386
387#ifdef CONFIG_NET_SCHED
388 to->tc_index = from->tc_index;
389#endif
390#ifdef CONFIG_NETFILTER
391
392 nf_conntrack_put(to->nfct);
393 to->nfct = from->nfct;
394 nf_conntrack_get(to->nfct);
395 to->nfctinfo = from->nfctinfo;
396#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
397 to->ipvs_property = from->ipvs_property;
398#endif
399#ifdef CONFIG_BRIDGE_NETFILTER
400 nf_bridge_put(to->nf_bridge);
401 to->nf_bridge = from->nf_bridge;
402 nf_bridge_get(to->nf_bridge);
403#endif
404#endif
405 skb_copy_secmark(to, from);
406}
407
408
409
410
411
412
413
414
415int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
416{
417 struct iphdr *iph;
418 int raw = 0;
419 int ptr;
420 struct net_device *dev;
421 struct sk_buff *skb2;
422 unsigned int mtu, hlen, left, len, ll_rs, pad;
423 int offset;
424 __be16 not_last_frag;
425 struct rtable *rt = (struct rtable*)skb->dst;
426 int err = 0;
427
428 dev = rt->u.dst.dev;
429
430
431
432
433
434 iph = skb->nh.iph;
435
436 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
437 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
438 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
439 htonl(dst_mtu(&rt->u.dst)));
440 kfree_skb(skb);
441 return -EMSGSIZE;
442 }
443
444
445
446
447
448 hlen = iph->ihl * 4;
449 mtu = dst_mtu(&rt->u.dst) - hlen;
450 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
451
452
453
454
455
456
457
458
459 if (skb_shinfo(skb)->frag_list) {
460 struct sk_buff *frag;
461 int first_len = skb_pagelen(skb);
462
463 if (first_len - hlen > mtu ||
464 ((first_len - hlen) & 7) ||
465 (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
466 skb_cloned(skb))
467 goto slow_path;
468
469 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
470
471 if (frag->len > mtu ||
472 ((frag->len & 7) && frag->next) ||
473 skb_headroom(frag) < hlen)
474 goto slow_path;
475
476
477 if (skb_shared(frag))
478 goto slow_path;
479
480 BUG_ON(frag->sk);
481 if (skb->sk) {
482 sock_hold(skb->sk);
483 frag->sk = skb->sk;
484 frag->destructor = sock_wfree;
485 skb->truesize -= frag->truesize;
486 }
487 }
488
489
490
491 err = 0;
492 offset = 0;
493 frag = skb_shinfo(skb)->frag_list;
494 skb_shinfo(skb)->frag_list = NULL;
495 skb->data_len = first_len - skb_headlen(skb);
496 skb->len = first_len;
497 iph->tot_len = htons(first_len);
498 iph->frag_off = htons(IP_MF);
499 ip_send_check(iph);
500
501 for (;;) {
502
503
504 if (frag) {
505 frag->ip_summed = CHECKSUM_NONE;
506 frag->h.raw = frag->data;
507 frag->nh.raw = __skb_push(frag, hlen);
508 memcpy(frag->nh.raw, iph, hlen);
509 iph = frag->nh.iph;
510 iph->tot_len = htons(frag->len);
511 ip_copy_metadata(frag, skb);
512 if (offset == 0)
513 ip_options_fragment(frag);
514 offset += skb->len - hlen;
515 iph->frag_off = htons(offset>>3);
516 if (frag->next != NULL)
517 iph->frag_off |= htons(IP_MF);
518
519 ip_send_check(iph);
520 }
521
522 err = output(skb);
523
524 if (!err)
525 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
526 if (err || !frag)
527 break;
528
529 skb = frag;
530 frag = skb->next;
531 skb->next = NULL;
532 }
533
534 if (err == 0) {
535 IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
536 return 0;
537 }
538
539 while (frag) {
540 skb = frag->next;
541 kfree_skb(frag);
542 frag = skb;
543 }
544 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
545 return err;
546 }
547
548slow_path:
549 left = skb->len - hlen;
550 ptr = raw + hlen;
551
552
553
554
555 pad = nf_bridge_pad(skb);
556 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
557 mtu -= pad;
558
559
560
561
562
563 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
564 not_last_frag = iph->frag_off & htons(IP_MF);
565
566
567
568
569
570 while(left > 0) {
571 len = left;
572
573 if (len > mtu)
574 len = mtu;
575
576
577 if (len < left) {
578 len &= ~7;
579 }
580
581
582
583
584 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
585 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
586 err = -ENOMEM;
587 goto fail;
588 }
589
590
591
592
593
594 ip_copy_metadata(skb2, skb);
595 skb_reserve(skb2, ll_rs);
596 skb_put(skb2, len + hlen);
597 skb2->nh.raw = skb2->data;
598 skb2->h.raw = skb2->data + hlen;
599
600
601
602
603
604
605 if (skb->sk)
606 skb_set_owner_w(skb2, skb->sk);
607
608
609
610
611
612 memcpy(skb2->nh.raw, skb->data, hlen);
613
614
615
616
617 if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
618 BUG();
619 left -= len;
620
621
622
623
624 iph = skb2->nh.iph;
625 iph->frag_off = htons((offset >> 3));
626
627
628
629
630
631
632
633 if (offset == 0)
634 ip_options_fragment(skb);
635
636
637
638
639
640 if (left > 0 || not_last_frag)
641 iph->frag_off |= htons(IP_MF);
642 ptr += len;
643 offset += len;
644
645
646
647
648 iph->tot_len = htons(len + hlen);
649
650 ip_send_check(iph);
651
652 err = output(skb2);
653 if (err)
654 goto fail;
655
656 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
657 }
658 kfree_skb(skb);
659 IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
660 return err;
661
662fail:
663 kfree_skb(skb);
664 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
665 return err;
666}
667
668EXPORT_SYMBOL(ip_fragment);
669
670int
671ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
672{
673 struct iovec *iov = from;
674
675 if (skb->ip_summed == CHECKSUM_PARTIAL) {
676 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
677 return -EFAULT;
678 } else {
679 __wsum csum = 0;
680 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
681 return -EFAULT;
682 skb->csum = csum_block_add(skb->csum, csum, odd);
683 }
684 return 0;
685}
686
687static inline __wsum
688csum_page(struct page *page, int offset, int copy)
689{
690 char *kaddr;
691 __wsum csum;
692 kaddr = kmap(page);
693 csum = csum_partial(kaddr + offset, copy, 0);
694 kunmap(page);
695 return csum;
696}
697
698static inline int ip_ufo_append_data(struct sock *sk,
699 int getfrag(void *from, char *to, int offset, int len,
700 int odd, struct sk_buff *skb),
701 void *from, int length, int hh_len, int fragheaderlen,
702 int transhdrlen, int mtu,unsigned int flags)
703{
704 struct sk_buff *skb;
705 int err;
706
707
708
709
710
711 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
712 skb = sock_alloc_send_skb(sk,
713 hh_len + fragheaderlen + transhdrlen + 20,
714 (flags & MSG_DONTWAIT), &err);
715
716 if (skb == NULL)
717 return err;
718
719
720 skb_reserve(skb, hh_len);
721
722
723 skb_put(skb,fragheaderlen + transhdrlen);
724
725
726 skb->nh.raw = skb->data;
727
728
729 skb->h.raw = skb->data + fragheaderlen;
730
731 skb->ip_summed = CHECKSUM_PARTIAL;
732 skb->csum = 0;
733 sk->sk_sndmsg_off = 0;
734 }
735
736 err = skb_append_datato_frags(sk,skb, getfrag, from,
737 (length - transhdrlen));
738 if (!err) {
739
740 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
741 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
742 __skb_queue_tail(&sk->sk_write_queue, skb);
743
744 return 0;
745 }
746
747
748
749 kfree_skb(skb);
750 return err;
751}
752
753
754
755
756
757
758
759
760
761
762
763
764int ip_append_data(struct sock *sk,
765 int getfrag(void *from, char *to, int offset, int len,
766 int odd, struct sk_buff *skb),
767 void *from, int length, int transhdrlen,
768 struct ipcm_cookie *ipc, struct rtable *rt,
769 unsigned int flags)
770{
771 struct inet_sock *inet = inet_sk(sk);
772 struct sk_buff *skb;
773
774 struct ip_options *opt = NULL;
775 int hh_len;
776 int exthdrlen;
777 int mtu;
778 int copy;
779 int err;
780 int offset = 0;
781 unsigned int maxfraglen, fragheaderlen;
782 int csummode = CHECKSUM_NONE;
783
784 if (flags&MSG_PROBE)
785 return 0;
786
787 if (skb_queue_empty(&sk->sk_write_queue)) {
788
789
790
791 opt = ipc->opt;
792 if (opt) {
793 if (inet->cork.opt == NULL) {
794 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
795 if (unlikely(inet->cork.opt == NULL))
796 return -ENOBUFS;
797 }
798 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
799 inet->cork.flags |= IPCORK_OPT;
800 inet->cork.addr = ipc->addr;
801 }
802 dst_hold(&rt->u.dst);
803 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
804 inet->cork.rt = rt;
805 inet->cork.length = 0;
806 sk->sk_sndmsg_page = NULL;
807 sk->sk_sndmsg_off = 0;
808 if ((exthdrlen = rt->u.dst.header_len) != 0) {
809 length += exthdrlen;
810 transhdrlen += exthdrlen;
811 }
812 } else {
813 rt = inet->cork.rt;
814 if (inet->cork.flags & IPCORK_OPT)
815 opt = inet->cork.opt;
816
817 transhdrlen = 0;
818 exthdrlen = 0;
819 mtu = inet->cork.fragsize;
820 }
821 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
822
823 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
824 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
825
826 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
827 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
828 return -EMSGSIZE;
829 }
830
831
832
833
834
835 if (transhdrlen &&
836 length + fragheaderlen <= mtu &&
837 rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
838 !exthdrlen)
839 csummode = CHECKSUM_PARTIAL;
840
841 inet->cork.length += length;
842 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
843 (rt->u.dst.dev->features & NETIF_F_UFO)) {
844
845 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
846 fragheaderlen, transhdrlen, mtu,
847 flags);
848 if (err)
849 goto error;
850 return 0;
851 }
852
853
854
855
856
857
858
859
860 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
861 goto alloc_new_skb;
862
863 while (length > 0) {
864
865 copy = mtu - skb->len;
866 if (copy < length)
867 copy = maxfraglen - skb->len;
868 if (copy <= 0) {
869 char *data;
870 unsigned int datalen;
871 unsigned int fraglen;
872 unsigned int fraggap;
873 unsigned int alloclen;
874 struct sk_buff *skb_prev;
875alloc_new_skb:
876 skb_prev = skb;
877 if (skb_prev)
878 fraggap = skb_prev->len - maxfraglen;
879 else
880 fraggap = 0;
881
882
883
884
885
886 datalen = length + fraggap;
887 if (datalen > mtu - fragheaderlen)
888 datalen = maxfraglen - fragheaderlen;
889 fraglen = datalen + fragheaderlen;
890
891 if ((flags & MSG_MORE) &&
892 !(rt->u.dst.dev->features&NETIF_F_SG))
893 alloclen = mtu;
894 else
895 alloclen = datalen + fragheaderlen;
896
897
898
899
900
901
902 if (datalen == length + fraggap)
903 alloclen += rt->u.dst.trailer_len;
904
905 if (transhdrlen) {
906 skb = sock_alloc_send_skb(sk,
907 alloclen + hh_len + 15,
908 (flags & MSG_DONTWAIT), &err);
909 } else {
910 skb = NULL;
911 if (atomic_read(&sk->sk_wmem_alloc) <=
912 2 * sk->sk_sndbuf)
913 skb = sock_wmalloc(sk,
914 alloclen + hh_len + 15, 1,
915 sk->sk_allocation);
916 if (unlikely(skb == NULL))
917 err = -ENOBUFS;
918 }
919 if (skb == NULL)
920 goto error;
921
922
923
924
925 skb->ip_summed = csummode;
926 skb->csum = 0;
927 skb_reserve(skb, hh_len);
928
929
930
931
932 data = skb_put(skb, fraglen);
933 skb->nh.raw = data + exthdrlen;
934 data += fragheaderlen;
935 skb->h.raw = data + exthdrlen;
936
937 if (fraggap) {
938 skb->csum = skb_copy_and_csum_bits(
939 skb_prev, maxfraglen,
940 data + transhdrlen, fraggap, 0);
941 skb_prev->csum = csum_sub(skb_prev->csum,
942 skb->csum);
943 data += fraggap;
944 pskb_trim_unique(skb_prev, maxfraglen);
945 }
946
947 copy = datalen - transhdrlen - fraggap;
948 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
949 err = -EFAULT;
950 kfree_skb(skb);
951 goto error;
952 }
953
954 offset += copy;
955 length -= datalen - fraggap;
956 transhdrlen = 0;
957 exthdrlen = 0;
958 csummode = CHECKSUM_NONE;
959
960
961
962
963 __skb_queue_tail(&sk->sk_write_queue, skb);
964 continue;
965 }
966
967 if (copy > length)
968 copy = length;
969
970 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
971 unsigned int off;
972
973 off = skb->len;
974 if (getfrag(from, skb_put(skb, copy),
975 offset, copy, off, skb) < 0) {
976 __skb_trim(skb, off);
977 err = -EFAULT;
978 goto error;
979 }
980 } else {
981 int i = skb_shinfo(skb)->nr_frags;
982 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
983 struct page *page = sk->sk_sndmsg_page;
984 int off = sk->sk_sndmsg_off;
985 unsigned int left;
986
987 if (page && (left = PAGE_SIZE - off) > 0) {
988 if (copy >= left)
989 copy = left;
990 if (page != frag->page) {
991 if (i == MAX_SKB_FRAGS) {
992 err = -EMSGSIZE;
993 goto error;
994 }
995 get_page(page);
996 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
997 frag = &skb_shinfo(skb)->frags[i];
998 }
999 } else if (i < MAX_SKB_FRAGS) {
1000 if (copy > PAGE_SIZE)
1001 copy = PAGE_SIZE;
1002 page = alloc_pages(sk->sk_allocation, 0);
1003 if (page == NULL) {
1004 err = -ENOMEM;
1005 goto error;
1006 }
1007 sk->sk_sndmsg_page = page;
1008 sk->sk_sndmsg_off = 0;
1009
1010 skb_fill_page_desc(skb, i, page, 0, 0);
1011 frag = &skb_shinfo(skb)->frags[i];
1012 skb->truesize += PAGE_SIZE;
1013 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1014 } else {
1015 err = -EMSGSIZE;
1016 goto error;
1017 }
1018 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1019 err = -EFAULT;
1020 goto error;
1021 }
1022 sk->sk_sndmsg_off += copy;
1023 frag->size += copy;
1024 skb->len += copy;
1025 skb->data_len += copy;
1026 }
1027 offset += copy;
1028 length -= copy;
1029 }
1030
1031 return 0;
1032
1033error:
1034 inet->cork.length -= length;
1035 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1036 return err;
1037}
1038
1039ssize_t ip_append_page(struct sock *sk, struct page *page,
1040 int offset, size_t size, int flags)
1041{
1042 struct inet_sock *inet = inet_sk(sk);
1043 struct sk_buff *skb;
1044 struct rtable *rt;
1045 struct ip_options *opt = NULL;
1046 int hh_len;
1047 int mtu;
1048 int len;
1049 int err;
1050 unsigned int maxfraglen, fragheaderlen, fraggap;
1051
1052 if (inet->hdrincl)
1053 return -EPERM;
1054
1055 if (flags&MSG_PROBE)
1056 return 0;
1057
1058 if (skb_queue_empty(&sk->sk_write_queue))
1059 return -EINVAL;
1060
1061 rt = inet->cork.rt;
1062 if (inet->cork.flags & IPCORK_OPT)
1063 opt = inet->cork.opt;
1064
1065 if (!(rt->u.dst.dev->features&NETIF_F_SG))
1066 return -EOPNOTSUPP;
1067
1068 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1069 mtu = inet->cork.fragsize;
1070
1071 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1072 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1073
1074 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1075 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1076 return -EMSGSIZE;
1077 }
1078
1079 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1080 return -EINVAL;
1081
1082 inet->cork.length += size;
1083 if ((sk->sk_protocol == IPPROTO_UDP) &&
1084 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1085 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1086 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1087 }
1088
1089
1090 while (size > 0) {
1091 int i;
1092
1093 if (skb_is_gso(skb))
1094 len = size;
1095 else {
1096
1097
1098 len = mtu - skb->len;
1099 if (len < size)
1100 len = maxfraglen - skb->len;
1101 }
1102 if (len <= 0) {
1103 struct sk_buff *skb_prev;
1104 char *data;
1105 struct iphdr *iph;
1106 int alloclen;
1107
1108 skb_prev = skb;
1109 fraggap = skb_prev->len - maxfraglen;
1110
1111 alloclen = fragheaderlen + hh_len + fraggap + 15;
1112 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1113 if (unlikely(!skb)) {
1114 err = -ENOBUFS;
1115 goto error;
1116 }
1117
1118
1119
1120
1121 skb->ip_summed = CHECKSUM_NONE;
1122 skb->csum = 0;
1123 skb_reserve(skb, hh_len);
1124
1125
1126
1127
1128 data = skb_put(skb, fragheaderlen + fraggap);
1129 skb->nh.iph = iph = (struct iphdr *)data;
1130 data += fragheaderlen;
1131 skb->h.raw = data;
1132
1133 if (fraggap) {
1134 skb->csum = skb_copy_and_csum_bits(
1135 skb_prev, maxfraglen,
1136 data, fraggap, 0);
1137 skb_prev->csum = csum_sub(skb_prev->csum,
1138 skb->csum);
1139 pskb_trim_unique(skb_prev, maxfraglen);
1140 }
1141
1142
1143
1144
1145 __skb_queue_tail(&sk->sk_write_queue, skb);
1146 continue;
1147 }
1148
1149 i = skb_shinfo(skb)->nr_frags;
1150 if (len > size)
1151 len = size;
1152 if (skb_can_coalesce(skb, i, page, offset)) {
1153 skb_shinfo(skb)->frags[i-1].size += len;
1154 } else if (i < MAX_SKB_FRAGS) {
1155 get_page(page);
1156 skb_fill_page_desc(skb, i, page, offset, len);
1157 } else {
1158 err = -EMSGSIZE;
1159 goto error;
1160 }
1161
1162 if (skb->ip_summed == CHECKSUM_NONE) {
1163 __wsum csum;
1164 csum = csum_page(page, offset, len);
1165 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1166 }
1167
1168 skb->len += len;
1169 skb->data_len += len;
1170 offset += len;
1171 size -= len;
1172 }
1173 return 0;
1174
1175error:
1176 inet->cork.length -= size;
1177 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1178 return err;
1179}
1180
1181
1182
1183
1184
1185int ip_push_pending_frames(struct sock *sk)
1186{
1187 struct sk_buff *skb, *tmp_skb;
1188 struct sk_buff **tail_skb;
1189 struct inet_sock *inet = inet_sk(sk);
1190 struct ip_options *opt = NULL;
1191 struct rtable *rt = inet->cork.rt;
1192 struct iphdr *iph;
1193 __be16 df = 0;
1194 __u8 ttl;
1195 int err = 0;
1196
1197 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1198 goto out;
1199 tail_skb = &(skb_shinfo(skb)->frag_list);
1200
1201
1202 if (skb->data < skb->nh.raw)
1203 __skb_pull(skb, skb->nh.raw - skb->data);
1204 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1205 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1206 *tail_skb = tmp_skb;
1207 tail_skb = &(tmp_skb->next);
1208 skb->len += tmp_skb->len;
1209 skb->data_len += tmp_skb->len;
1210 skb->truesize += tmp_skb->truesize;
1211 __sock_put(tmp_skb->sk);
1212 tmp_skb->destructor = NULL;
1213 tmp_skb->sk = NULL;
1214 }
1215
1216
1217
1218
1219
1220 if (inet->pmtudisc != IP_PMTUDISC_DO)
1221 skb->local_df = 1;
1222
1223
1224
1225
1226 if (inet->pmtudisc == IP_PMTUDISC_DO ||
1227 (skb->len <= dst_mtu(&rt->u.dst) &&
1228 ip_dont_fragment(sk, &rt->u.dst)))
1229 df = htons(IP_DF);
1230
1231 if (inet->cork.flags & IPCORK_OPT)
1232 opt = inet->cork.opt;
1233
1234 if (rt->rt_type == RTN_MULTICAST)
1235 ttl = inet->mc_ttl;
1236 else
1237 ttl = ip_select_ttl(inet, &rt->u.dst);
1238
1239 iph = (struct iphdr *)skb->data;
1240 iph->version = 4;
1241 iph->ihl = 5;
1242 if (opt) {
1243 iph->ihl += opt->optlen>>2;
1244 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1245 }
1246 iph->tos = inet->tos;
1247 iph->tot_len = htons(skb->len);
1248 iph->frag_off = df;
1249 ip_select_ident(iph, &rt->u.dst, sk);
1250 iph->ttl = ttl;
1251 iph->protocol = sk->sk_protocol;
1252 iph->saddr = rt->rt_src;
1253 iph->daddr = rt->rt_dst;
1254 ip_send_check(iph);
1255
1256 skb->priority = sk->sk_priority;
1257 skb->dst = dst_clone(&rt->u.dst);
1258
1259
1260 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1261 skb->dst->dev, dst_output);
1262 if (err) {
1263 if (err > 0)
1264 err = inet->recverr ? net_xmit_errno(err) : 0;
1265 if (err)
1266 goto error;
1267 }
1268
1269out:
1270 inet->cork.flags &= ~IPCORK_OPT;
1271 kfree(inet->cork.opt);
1272 inet->cork.opt = NULL;
1273 if (inet->cork.rt) {
1274 ip_rt_put(inet->cork.rt);
1275 inet->cork.rt = NULL;
1276 }
1277 return err;
1278
1279error:
1280 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1281 goto out;
1282}
1283
1284
1285
1286
1287void ip_flush_pending_frames(struct sock *sk)
1288{
1289 struct inet_sock *inet = inet_sk(sk);
1290 struct sk_buff *skb;
1291
1292 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1293 kfree_skb(skb);
1294
1295 inet->cork.flags &= ~IPCORK_OPT;
1296 kfree(inet->cork.opt);
1297 inet->cork.opt = NULL;
1298 if (inet->cork.rt) {
1299 ip_rt_put(inet->cork.rt);
1300 inet->cork.rt = NULL;
1301 }
1302}
1303
1304
1305
1306
1307
1308static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1309 int len, int odd, struct sk_buff *skb)
1310{
1311 __wsum csum;
1312
1313 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1314 skb->csum = csum_block_add(skb->csum, csum, odd);
1315 return 0;
1316}
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1328 unsigned int len)
1329{
1330 struct inet_sock *inet = inet_sk(sk);
1331 struct {
1332 struct ip_options opt;
1333 char data[40];
1334 } replyopts;
1335 struct ipcm_cookie ipc;
1336 __be32 daddr;
1337 struct rtable *rt = (struct rtable*)skb->dst;
1338
1339 if (ip_options_echo(&replyopts.opt, skb))
1340 return;
1341
1342 daddr = ipc.addr = rt->rt_src;
1343 ipc.opt = NULL;
1344
1345 if (replyopts.opt.optlen) {
1346 ipc.opt = &replyopts.opt;
1347
1348 if (ipc.opt->srr)
1349 daddr = replyopts.opt.faddr;
1350 }
1351
1352 {
1353 struct flowi fl = { .nl_u = { .ip4_u =
1354 { .daddr = daddr,
1355 .saddr = rt->rt_spec_dst,
1356 .tos = RT_TOS(skb->nh.iph->tos) } },
1357
1358 .uli_u = { .ports =
1359 { .sport = skb->h.th->dest,
1360 .dport = skb->h.th->source } },
1361 .proto = sk->sk_protocol };
1362 security_skb_classify_flow(skb, &fl);
1363 if (ip_route_output_key(&rt, &fl))
1364 return;
1365 }
1366
1367
1368
1369
1370
1371
1372
1373 bh_lock_sock(sk);
1374 inet->tos = skb->nh.iph->tos;
1375 sk->sk_priority = skb->priority;
1376 sk->sk_protocol = skb->nh.iph->protocol;
1377 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1378 &ipc, rt, MSG_DONTWAIT);
1379 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1380 if (arg->csumoffset >= 0)
1381 *((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1382 skb->ip_summed = CHECKSUM_NONE;
1383 ip_push_pending_frames(sk);
1384 }
1385
1386 bh_unlock_sock(sk);
1387
1388 ip_rt_put(rt);
1389}
1390
1391void __init ip_init(void)
1392{
1393 ip_rt_init();
1394 inet_initpeers();
1395
1396#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1397 igmp_mc_proc_init();
1398#endif
1399}
1400
1401EXPORT_SYMBOL(ip_generic_getfrag);
1402EXPORT_SYMBOL(ip_queue_xmit);
1403EXPORT_SYMBOL(ip_send_check);
1404