1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/mm.h>
53#include <linux/string.h>
54#include <linux/errno.h>
55#include <linux/highmem.h>
56
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/in.h>
60#include <linux/inet.h>
61#include <linux/netdevice.h>
62#include <linux/etherdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/stat.h>
65#include <linux/init.h>
66
67#include <net/snmp.h>
68#include <net/ip.h>
69#include <net/protocol.h>
70#include <net/route.h>
71#include <net/xfrm.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <net/arp.h>
75#include <net/icmp.h>
76#include <net/checksum.h>
77#include <net/inetpeer.h>
78#include <linux/igmp.h>
79#include <linux/netfilter_ipv4.h>
80#include <linux/netfilter_bridge.h>
81#include <linux/mroute.h>
82#include <linux/netlink.h>
83#include <linux/tcp.h>
84
85int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
86
87
88__inline__ void ip_send_check(struct iphdr *iph)
89{
90 iph->check = 0;
91 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
92}
93
94int __ip_local_out(struct sk_buff *skb)
95{
96 struct iphdr *iph = ip_hdr(skb);
97
98 iph->tot_len = htons(skb->len);
99 ip_send_check(iph);
100 return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
101 dst_output);
102}
103
104int ip_local_out(struct sk_buff *skb)
105{
106 int err;
107
108 err = __ip_local_out(skb);
109 if (likely(err == 1))
110 err = dst_output(skb);
111
112 return err;
113}
114EXPORT_SYMBOL_GPL(ip_local_out);
115
116
117static int ip_dev_loopback_xmit(struct sk_buff *newskb)
118{
119 skb_reset_mac_header(newskb);
120 __skb_pull(newskb, skb_network_offset(newskb));
121 newskb->pkt_type = PACKET_LOOPBACK;
122 newskb->ip_summed = CHECKSUM_UNNECESSARY;
123 BUG_TRAP(newskb->dst);
124 netif_rx(newskb);
125 return 0;
126}
127
128static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
129{
130 int ttl = inet->uc_ttl;
131
132 if (ttl < 0)
133 ttl = dst_metric(dst, RTAX_HOPLIMIT);
134 return ttl;
135}
136
137
138
139
140
141int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
142 __be32 saddr, __be32 daddr, struct ip_options *opt)
143{
144 struct inet_sock *inet = inet_sk(sk);
145 struct rtable *rt = (struct rtable *)skb->dst;
146 struct iphdr *iph;
147
148
149 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
150 skb_reset_network_header(skb);
151 iph = ip_hdr(skb);
152 iph->version = 4;
153 iph->ihl = 5;
154 iph->tos = inet->tos;
155 if (ip_dont_fragment(sk, &rt->u.dst))
156 iph->frag_off = htons(IP_DF);
157 else
158 iph->frag_off = 0;
159 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
160 iph->daddr = rt->rt_dst;
161 iph->saddr = rt->rt_src;
162 iph->protocol = sk->sk_protocol;
163 ip_select_ident(iph, &rt->u.dst, sk);
164
165 if (opt && opt->optlen) {
166 iph->ihl += opt->optlen>>2;
167 ip_options_build(skb, opt, daddr, rt, 0);
168 }
169
170 skb->priority = sk->sk_priority;
171 skb->mark = sk->sk_mark;
172
173
174 return ip_local_out(skb);
175}
176
177EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
178
179static inline int ip_finish_output2(struct sk_buff *skb)
180{
181 struct dst_entry *dst = skb->dst;
182 struct rtable *rt = (struct rtable *)dst;
183 struct net_device *dev = dst->dev;
184 unsigned int hh_len = LL_RESERVED_SPACE(dev);
185
186 if (rt->rt_type == RTN_MULTICAST)
187 IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
188 else if (rt->rt_type == RTN_BROADCAST)
189 IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
190
191
192 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
193 struct sk_buff *skb2;
194
195 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
196 if (skb2 == NULL) {
197 kfree_skb(skb);
198 return -ENOMEM;
199 }
200 if (skb->sk)
201 skb_set_owner_w(skb2, skb->sk);
202 kfree_skb(skb);
203 skb = skb2;
204 }
205
206 if (dst->hh)
207 return neigh_hh_output(dst->hh, skb);
208 else if (dst->neighbour)
209 return dst->neighbour->output(skb);
210
211 if (net_ratelimit())
212 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
213 kfree_skb(skb);
214 return -EINVAL;
215}
216
217static inline int ip_skb_dst_mtu(struct sk_buff *skb)
218{
219 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
220
221 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
222 skb->dst->dev->mtu : dst_mtu(skb->dst);
223}
224
225static int ip_finish_output(struct sk_buff *skb)
226{
227#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
228
229 if (skb->dst->xfrm != NULL) {
230 IPCB(skb)->flags |= IPSKB_REROUTED;
231 return dst_output(skb);
232 }
233#endif
234 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
235 return ip_fragment(skb, ip_finish_output2);
236 else
237 return ip_finish_output2(skb);
238}
239
240int ip_mc_output(struct sk_buff *skb)
241{
242 struct sock *sk = skb->sk;
243 struct rtable *rt = (struct rtable*)skb->dst;
244 struct net_device *dev = rt->u.dst.dev;
245
246
247
248
249 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
250
251 skb->dev = dev;
252 skb->protocol = htons(ETH_P_IP);
253
254
255
256
257
258 if (rt->rt_flags&RTCF_MULTICAST) {
259 if ((!sk || inet_sk(sk)->mc_loop)
260#ifdef CONFIG_IP_MROUTE
261
262
263
264
265
266
267
268
269 && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
270#endif
271 ) {
272 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
273 if (newskb)
274 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb,
275 NULL, newskb->dev,
276 ip_dev_loopback_xmit);
277 }
278
279
280
281 if (ip_hdr(skb)->ttl == 0) {
282 kfree_skb(skb);
283 return 0;
284 }
285 }
286
287 if (rt->rt_flags&RTCF_BROADCAST) {
288 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
289 if (newskb)
290 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL,
291 newskb->dev, ip_dev_loopback_xmit);
292 }
293
294 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
295 ip_finish_output,
296 !(IPCB(skb)->flags & IPSKB_REROUTED));
297}
298
299int ip_output(struct sk_buff *skb)
300{
301 struct net_device *dev = skb->dst->dev;
302
303 IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
304
305 skb->dev = dev;
306 skb->protocol = htons(ETH_P_IP);
307
308 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
309 ip_finish_output,
310 !(IPCB(skb)->flags & IPSKB_REROUTED));
311}
312
313int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
314{
315 struct sock *sk = skb->sk;
316 struct inet_sock *inet = inet_sk(sk);
317 struct ip_options *opt = inet->opt;
318 struct rtable *rt;
319 struct iphdr *iph;
320
321
322
323
324 rt = (struct rtable *) skb->dst;
325 if (rt != NULL)
326 goto packet_routed;
327
328
329 rt = (struct rtable *)__sk_dst_check(sk, 0);
330 if (rt == NULL) {
331 __be32 daddr;
332
333
334 daddr = inet->daddr;
335 if(opt && opt->srr)
336 daddr = opt->faddr;
337
338 {
339 struct flowi fl = { .oif = sk->sk_bound_dev_if,
340 .nl_u = { .ip4_u =
341 { .daddr = daddr,
342 .saddr = inet->saddr,
343 .tos = RT_CONN_FLAGS(sk) } },
344 .proto = sk->sk_protocol,
345 .uli_u = { .ports =
346 { .sport = inet->sport,
347 .dport = inet->dport } } };
348
349
350
351
352
353 security_sk_classify_flow(sk, &fl);
354 if (ip_route_output_flow(&init_net, &rt, &fl, sk, 0))
355 goto no_route;
356 }
357 sk_setup_caps(sk, &rt->u.dst);
358 }
359 skb->dst = dst_clone(&rt->u.dst);
360
361packet_routed:
362 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
363 goto no_route;
364
365
366 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
367 skb_reset_network_header(skb);
368 iph = ip_hdr(skb);
369 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
370 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
371 iph->frag_off = htons(IP_DF);
372 else
373 iph->frag_off = 0;
374 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
375 iph->protocol = sk->sk_protocol;
376 iph->saddr = rt->rt_src;
377 iph->daddr = rt->rt_dst;
378
379
380 if (opt && opt->optlen) {
381 iph->ihl += opt->optlen >> 2;
382 ip_options_build(skb, opt, inet->daddr, rt, 0);
383 }
384
385 ip_select_ident_more(iph, &rt->u.dst, sk,
386 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
387
388 skb->priority = sk->sk_priority;
389 skb->mark = sk->sk_mark;
390
391 return ip_local_out(skb);
392
393no_route:
394 IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
395 kfree_skb(skb);
396 return -EHOSTUNREACH;
397}
398
399
400static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
401{
402 to->pkt_type = from->pkt_type;
403 to->priority = from->priority;
404 to->protocol = from->protocol;
405 dst_release(to->dst);
406 to->dst = dst_clone(from->dst);
407 to->dev = from->dev;
408 to->mark = from->mark;
409
410
411 IPCB(to)->flags = IPCB(from)->flags;
412
413#ifdef CONFIG_NET_SCHED
414 to->tc_index = from->tc_index;
415#endif
416 nf_copy(to, from);
417#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
418 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
419 to->nf_trace = from->nf_trace;
420#endif
421#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
422 to->ipvs_property = from->ipvs_property;
423#endif
424 skb_copy_secmark(to, from);
425}
426
427
428
429
430
431
432
433
434int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
435{
436 struct iphdr *iph;
437 int raw = 0;
438 int ptr;
439 struct net_device *dev;
440 struct sk_buff *skb2;
441 unsigned int mtu, hlen, left, len, ll_rs, pad;
442 int offset;
443 __be16 not_last_frag;
444 struct rtable *rt = (struct rtable*)skb->dst;
445 int err = 0;
446
447 dev = rt->u.dst.dev;
448
449
450
451
452
453 iph = ip_hdr(skb);
454
455 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
456 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
457 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
458 htonl(ip_skb_dst_mtu(skb)));
459 kfree_skb(skb);
460 return -EMSGSIZE;
461 }
462
463
464
465
466
467 hlen = iph->ihl * 4;
468 mtu = dst_mtu(&rt->u.dst) - hlen;
469 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
470
471
472
473
474
475
476
477
478 if (skb_shinfo(skb)->frag_list) {
479 struct sk_buff *frag;
480 int first_len = skb_pagelen(skb);
481 int truesizes = 0;
482
483 if (first_len - hlen > mtu ||
484 ((first_len - hlen) & 7) ||
485 (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
486 skb_cloned(skb))
487 goto slow_path;
488
489 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
490
491 if (frag->len > mtu ||
492 ((frag->len & 7) && frag->next) ||
493 skb_headroom(frag) < hlen)
494 goto slow_path;
495
496
497 if (skb_shared(frag))
498 goto slow_path;
499
500 BUG_ON(frag->sk);
501 if (skb->sk) {
502 sock_hold(skb->sk);
503 frag->sk = skb->sk;
504 frag->destructor = sock_wfree;
505 truesizes += frag->truesize;
506 }
507 }
508
509
510
511 err = 0;
512 offset = 0;
513 frag = skb_shinfo(skb)->frag_list;
514 skb_shinfo(skb)->frag_list = NULL;
515 skb->data_len = first_len - skb_headlen(skb);
516 skb->truesize -= truesizes;
517 skb->len = first_len;
518 iph->tot_len = htons(first_len);
519 iph->frag_off = htons(IP_MF);
520 ip_send_check(iph);
521
522 for (;;) {
523
524
525 if (frag) {
526 frag->ip_summed = CHECKSUM_NONE;
527 skb_reset_transport_header(frag);
528 __skb_push(frag, hlen);
529 skb_reset_network_header(frag);
530 memcpy(skb_network_header(frag), iph, hlen);
531 iph = ip_hdr(frag);
532 iph->tot_len = htons(frag->len);
533 ip_copy_metadata(frag, skb);
534 if (offset == 0)
535 ip_options_fragment(frag);
536 offset += skb->len - hlen;
537 iph->frag_off = htons(offset>>3);
538 if (frag->next != NULL)
539 iph->frag_off |= htons(IP_MF);
540
541 ip_send_check(iph);
542 }
543
544 err = output(skb);
545
546 if (!err)
547 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
548 if (err || !frag)
549 break;
550
551 skb = frag;
552 frag = skb->next;
553 skb->next = NULL;
554 }
555
556 if (err == 0) {
557 IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
558 return 0;
559 }
560
561 while (frag) {
562 skb = frag->next;
563 kfree_skb(frag);
564 frag = skb;
565 }
566 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
567 return err;
568 }
569
570slow_path:
571 left = skb->len - hlen;
572 ptr = raw + hlen;
573
574
575
576
577 pad = nf_bridge_pad(skb);
578 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
579 mtu -= pad;
580
581
582
583
584
585 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
586 not_last_frag = iph->frag_off & htons(IP_MF);
587
588
589
590
591
592 while (left > 0) {
593 len = left;
594
595 if (len > mtu)
596 len = mtu;
597
598
599 if (len < left) {
600 len &= ~7;
601 }
602
603
604
605
606 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
607 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
608 err = -ENOMEM;
609 goto fail;
610 }
611
612
613
614
615
616 ip_copy_metadata(skb2, skb);
617 skb_reserve(skb2, ll_rs);
618 skb_put(skb2, len + hlen);
619 skb_reset_network_header(skb2);
620 skb2->transport_header = skb2->network_header + hlen;
621
622
623
624
625
626
627 if (skb->sk)
628 skb_set_owner_w(skb2, skb->sk);
629
630
631
632
633
634 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
635
636
637
638
639 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
640 BUG();
641 left -= len;
642
643
644
645
646 iph = ip_hdr(skb2);
647 iph->frag_off = htons((offset >> 3));
648
649
650
651
652
653
654
655 if (offset == 0)
656 ip_options_fragment(skb);
657
658
659
660
661
662 if (left > 0 || not_last_frag)
663 iph->frag_off |= htons(IP_MF);
664 ptr += len;
665 offset += len;
666
667
668
669
670 iph->tot_len = htons(len + hlen);
671
672 ip_send_check(iph);
673
674 err = output(skb2);
675 if (err)
676 goto fail;
677
678 IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
679 }
680 kfree_skb(skb);
681 IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
682 return err;
683
684fail:
685 kfree_skb(skb);
686 IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
687 return err;
688}
689
690EXPORT_SYMBOL(ip_fragment);
691
692int
693ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
694{
695 struct iovec *iov = from;
696
697 if (skb->ip_summed == CHECKSUM_PARTIAL) {
698 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
699 return -EFAULT;
700 } else {
701 __wsum csum = 0;
702 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
703 return -EFAULT;
704 skb->csum = csum_block_add(skb->csum, csum, odd);
705 }
706 return 0;
707}
708
709static inline __wsum
710csum_page(struct page *page, int offset, int copy)
711{
712 char *kaddr;
713 __wsum csum;
714 kaddr = kmap(page);
715 csum = csum_partial(kaddr + offset, copy, 0);
716 kunmap(page);
717 return csum;
718}
719
720static inline int ip_ufo_append_data(struct sock *sk,
721 int getfrag(void *from, char *to, int offset, int len,
722 int odd, struct sk_buff *skb),
723 void *from, int length, int hh_len, int fragheaderlen,
724 int transhdrlen, int mtu,unsigned int flags)
725{
726 struct sk_buff *skb;
727 int err;
728
729
730
731
732
733 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
734 skb = sock_alloc_send_skb(sk,
735 hh_len + fragheaderlen + transhdrlen + 20,
736 (flags & MSG_DONTWAIT), &err);
737
738 if (skb == NULL)
739 return err;
740
741
742 skb_reserve(skb, hh_len);
743
744
745 skb_put(skb,fragheaderlen + transhdrlen);
746
747
748 skb_reset_network_header(skb);
749
750
751 skb->transport_header = skb->network_header + fragheaderlen;
752
753 skb->ip_summed = CHECKSUM_PARTIAL;
754 skb->csum = 0;
755 sk->sk_sndmsg_off = 0;
756 }
757
758 err = skb_append_datato_frags(sk,skb, getfrag, from,
759 (length - transhdrlen));
760 if (!err) {
761
762 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
763 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
764 __skb_queue_tail(&sk->sk_write_queue, skb);
765
766 return 0;
767 }
768
769
770
771 kfree_skb(skb);
772 return err;
773}
774
775
776
777
778
779
780
781
782
783
784
785
786int ip_append_data(struct sock *sk,
787 int getfrag(void *from, char *to, int offset, int len,
788 int odd, struct sk_buff *skb),
789 void *from, int length, int transhdrlen,
790 struct ipcm_cookie *ipc, struct rtable *rt,
791 unsigned int flags)
792{
793 struct inet_sock *inet = inet_sk(sk);
794 struct sk_buff *skb;
795
796 struct ip_options *opt = NULL;
797 int hh_len;
798 int exthdrlen;
799 int mtu;
800 int copy;
801 int err;
802 int offset = 0;
803 unsigned int maxfraglen, fragheaderlen;
804 int csummode = CHECKSUM_NONE;
805
806 if (flags&MSG_PROBE)
807 return 0;
808
809 if (skb_queue_empty(&sk->sk_write_queue)) {
810
811
812
813 opt = ipc->opt;
814 if (opt) {
815 if (inet->cork.opt == NULL) {
816 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
817 if (unlikely(inet->cork.opt == NULL))
818 return -ENOBUFS;
819 }
820 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
821 inet->cork.flags |= IPCORK_OPT;
822 inet->cork.addr = ipc->addr;
823 }
824 dst_hold(&rt->u.dst);
825 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
826 rt->u.dst.dev->mtu :
827 dst_mtu(rt->u.dst.path);
828 inet->cork.rt = rt;
829 inet->cork.length = 0;
830 sk->sk_sndmsg_page = NULL;
831 sk->sk_sndmsg_off = 0;
832 if ((exthdrlen = rt->u.dst.header_len) != 0) {
833 length += exthdrlen;
834 transhdrlen += exthdrlen;
835 }
836 } else {
837 rt = inet->cork.rt;
838 if (inet->cork.flags & IPCORK_OPT)
839 opt = inet->cork.opt;
840
841 transhdrlen = 0;
842 exthdrlen = 0;
843 mtu = inet->cork.fragsize;
844 }
845 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
846
847 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
848 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
849
850 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
851 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
852 return -EMSGSIZE;
853 }
854
855
856
857
858
859 if (transhdrlen &&
860 length + fragheaderlen <= mtu &&
861 rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
862 !exthdrlen)
863 csummode = CHECKSUM_PARTIAL;
864
865 inet->cork.length += length;
866 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
867 (rt->u.dst.dev->features & NETIF_F_UFO)) {
868
869 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
870 fragheaderlen, transhdrlen, mtu,
871 flags);
872 if (err)
873 goto error;
874 return 0;
875 }
876
877
878
879
880
881
882
883
884 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
885 goto alloc_new_skb;
886
887 while (length > 0) {
888
889 copy = mtu - skb->len;
890 if (copy < length)
891 copy = maxfraglen - skb->len;
892 if (copy <= 0) {
893 char *data;
894 unsigned int datalen;
895 unsigned int fraglen;
896 unsigned int fraggap;
897 unsigned int alloclen;
898 struct sk_buff *skb_prev;
899alloc_new_skb:
900 skb_prev = skb;
901 if (skb_prev)
902 fraggap = skb_prev->len - maxfraglen;
903 else
904 fraggap = 0;
905
906
907
908
909
910 datalen = length + fraggap;
911 if (datalen > mtu - fragheaderlen)
912 datalen = maxfraglen - fragheaderlen;
913 fraglen = datalen + fragheaderlen;
914
915 if ((flags & MSG_MORE) &&
916 !(rt->u.dst.dev->features&NETIF_F_SG))
917 alloclen = mtu;
918 else
919 alloclen = datalen + fragheaderlen;
920
921
922
923
924
925
926 if (datalen == length + fraggap)
927 alloclen += rt->u.dst.trailer_len;
928
929 if (transhdrlen) {
930 skb = sock_alloc_send_skb(sk,
931 alloclen + hh_len + 15,
932 (flags & MSG_DONTWAIT), &err);
933 } else {
934 skb = NULL;
935 if (atomic_read(&sk->sk_wmem_alloc) <=
936 2 * sk->sk_sndbuf)
937 skb = sock_wmalloc(sk,
938 alloclen + hh_len + 15, 1,
939 sk->sk_allocation);
940 if (unlikely(skb == NULL))
941 err = -ENOBUFS;
942 }
943 if (skb == NULL)
944 goto error;
945
946
947
948
949 skb->ip_summed = csummode;
950 skb->csum = 0;
951 skb_reserve(skb, hh_len);
952
953
954
955
956 data = skb_put(skb, fraglen);
957 skb_set_network_header(skb, exthdrlen);
958 skb->transport_header = (skb->network_header +
959 fragheaderlen);
960 data += fragheaderlen;
961
962 if (fraggap) {
963 skb->csum = skb_copy_and_csum_bits(
964 skb_prev, maxfraglen,
965 data + transhdrlen, fraggap, 0);
966 skb_prev->csum = csum_sub(skb_prev->csum,
967 skb->csum);
968 data += fraggap;
969 pskb_trim_unique(skb_prev, maxfraglen);
970 }
971
972 copy = datalen - transhdrlen - fraggap;
973 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
974 err = -EFAULT;
975 kfree_skb(skb);
976 goto error;
977 }
978
979 offset += copy;
980 length -= datalen - fraggap;
981 transhdrlen = 0;
982 exthdrlen = 0;
983 csummode = CHECKSUM_NONE;
984
985
986
987
988 __skb_queue_tail(&sk->sk_write_queue, skb);
989 continue;
990 }
991
992 if (copy > length)
993 copy = length;
994
995 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
996 unsigned int off;
997
998 off = skb->len;
999 if (getfrag(from, skb_put(skb, copy),
1000 offset, copy, off, skb) < 0) {
1001 __skb_trim(skb, off);
1002 err = -EFAULT;
1003 goto error;
1004 }
1005 } else {
1006 int i = skb_shinfo(skb)->nr_frags;
1007 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1008 struct page *page = sk->sk_sndmsg_page;
1009 int off = sk->sk_sndmsg_off;
1010 unsigned int left;
1011
1012 if (page && (left = PAGE_SIZE - off) > 0) {
1013 if (copy >= left)
1014 copy = left;
1015 if (page != frag->page) {
1016 if (i == MAX_SKB_FRAGS) {
1017 err = -EMSGSIZE;
1018 goto error;
1019 }
1020 get_page(page);
1021 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1022 frag = &skb_shinfo(skb)->frags[i];
1023 }
1024 } else if (i < MAX_SKB_FRAGS) {
1025 if (copy > PAGE_SIZE)
1026 copy = PAGE_SIZE;
1027 page = alloc_pages(sk->sk_allocation, 0);
1028 if (page == NULL) {
1029 err = -ENOMEM;
1030 goto error;
1031 }
1032 sk->sk_sndmsg_page = page;
1033 sk->sk_sndmsg_off = 0;
1034
1035 skb_fill_page_desc(skb, i, page, 0, 0);
1036 frag = &skb_shinfo(skb)->frags[i];
1037 } else {
1038 err = -EMSGSIZE;
1039 goto error;
1040 }
1041 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1042 err = -EFAULT;
1043 goto error;
1044 }
1045 sk->sk_sndmsg_off += copy;
1046 frag->size += copy;
1047 skb->len += copy;
1048 skb->data_len += copy;
1049 skb->truesize += copy;
1050 atomic_add(copy, &sk->sk_wmem_alloc);
1051 }
1052 offset += copy;
1053 length -= copy;
1054 }
1055
1056 return 0;
1057
1058error:
1059 inet->cork.length -= length;
1060 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1061 return err;
1062}
1063
1064ssize_t ip_append_page(struct sock *sk, struct page *page,
1065 int offset, size_t size, int flags)
1066{
1067 struct inet_sock *inet = inet_sk(sk);
1068 struct sk_buff *skb;
1069 struct rtable *rt;
1070 struct ip_options *opt = NULL;
1071 int hh_len;
1072 int mtu;
1073 int len;
1074 int err;
1075 unsigned int maxfraglen, fragheaderlen, fraggap;
1076
1077 if (inet->hdrincl)
1078 return -EPERM;
1079
1080 if (flags&MSG_PROBE)
1081 return 0;
1082
1083 if (skb_queue_empty(&sk->sk_write_queue))
1084 return -EINVAL;
1085
1086 rt = inet->cork.rt;
1087 if (inet->cork.flags & IPCORK_OPT)
1088 opt = inet->cork.opt;
1089
1090 if (!(rt->u.dst.dev->features&NETIF_F_SG))
1091 return -EOPNOTSUPP;
1092
1093 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1094 mtu = inet->cork.fragsize;
1095
1096 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1097 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1098
1099 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1100 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1101 return -EMSGSIZE;
1102 }
1103
1104 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1105 return -EINVAL;
1106
1107 inet->cork.length += size;
1108 if ((sk->sk_protocol == IPPROTO_UDP) &&
1109 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1110 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1111 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1112 }
1113
1114
1115 while (size > 0) {
1116 int i;
1117
1118 if (skb_is_gso(skb))
1119 len = size;
1120 else {
1121
1122
1123 len = mtu - skb->len;
1124 if (len < size)
1125 len = maxfraglen - skb->len;
1126 }
1127 if (len <= 0) {
1128 struct sk_buff *skb_prev;
1129 int alloclen;
1130
1131 skb_prev = skb;
1132 fraggap = skb_prev->len - maxfraglen;
1133
1134 alloclen = fragheaderlen + hh_len + fraggap + 15;
1135 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1136 if (unlikely(!skb)) {
1137 err = -ENOBUFS;
1138 goto error;
1139 }
1140
1141
1142
1143
1144 skb->ip_summed = CHECKSUM_NONE;
1145 skb->csum = 0;
1146 skb_reserve(skb, hh_len);
1147
1148
1149
1150
1151 skb_put(skb, fragheaderlen + fraggap);
1152 skb_reset_network_header(skb);
1153 skb->transport_header = (skb->network_header +
1154 fragheaderlen);
1155 if (fraggap) {
1156 skb->csum = skb_copy_and_csum_bits(skb_prev,
1157 maxfraglen,
1158 skb_transport_header(skb),
1159 fraggap, 0);
1160 skb_prev->csum = csum_sub(skb_prev->csum,
1161 skb->csum);
1162 pskb_trim_unique(skb_prev, maxfraglen);
1163 }
1164
1165
1166
1167
1168 __skb_queue_tail(&sk->sk_write_queue, skb);
1169 continue;
1170 }
1171
1172 i = skb_shinfo(skb)->nr_frags;
1173 if (len > size)
1174 len = size;
1175 if (skb_can_coalesce(skb, i, page, offset)) {
1176 skb_shinfo(skb)->frags[i-1].size += len;
1177 } else if (i < MAX_SKB_FRAGS) {
1178 get_page(page);
1179 skb_fill_page_desc(skb, i, page, offset, len);
1180 } else {
1181 err = -EMSGSIZE;
1182 goto error;
1183 }
1184
1185 if (skb->ip_summed == CHECKSUM_NONE) {
1186 __wsum csum;
1187 csum = csum_page(page, offset, len);
1188 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1189 }
1190
1191 skb->len += len;
1192 skb->data_len += len;
1193 skb->truesize += len;
1194 atomic_add(len, &sk->sk_wmem_alloc);
1195 offset += len;
1196 size -= len;
1197 }
1198 return 0;
1199
1200error:
1201 inet->cork.length -= size;
1202 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1203 return err;
1204}
1205
1206static void ip_cork_release(struct inet_sock *inet)
1207{
1208 inet->cork.flags &= ~IPCORK_OPT;
1209 kfree(inet->cork.opt);
1210 inet->cork.opt = NULL;
1211 if (inet->cork.rt) {
1212 ip_rt_put(inet->cork.rt);
1213 inet->cork.rt = NULL;
1214 }
1215}
1216
1217
1218
1219
1220
1221int ip_push_pending_frames(struct sock *sk)
1222{
1223 struct sk_buff *skb, *tmp_skb;
1224 struct sk_buff **tail_skb;
1225 struct inet_sock *inet = inet_sk(sk);
1226 struct ip_options *opt = NULL;
1227 struct rtable *rt = inet->cork.rt;
1228 struct iphdr *iph;
1229 __be16 df = 0;
1230 __u8 ttl;
1231 int err = 0;
1232
1233 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1234 goto out;
1235 tail_skb = &(skb_shinfo(skb)->frag_list);
1236
1237
1238 if (skb->data < skb_network_header(skb))
1239 __skb_pull(skb, skb_network_offset(skb));
1240 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1241 __skb_pull(tmp_skb, skb_network_header_len(skb));
1242 *tail_skb = tmp_skb;
1243 tail_skb = &(tmp_skb->next);
1244 skb->len += tmp_skb->len;
1245 skb->data_len += tmp_skb->len;
1246 skb->truesize += tmp_skb->truesize;
1247 __sock_put(tmp_skb->sk);
1248 tmp_skb->destructor = NULL;
1249 tmp_skb->sk = NULL;
1250 }
1251
1252
1253
1254
1255
1256 if (inet->pmtudisc < IP_PMTUDISC_DO)
1257 skb->local_df = 1;
1258
1259
1260
1261
1262 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1263 (skb->len <= dst_mtu(&rt->u.dst) &&
1264 ip_dont_fragment(sk, &rt->u.dst)))
1265 df = htons(IP_DF);
1266
1267 if (inet->cork.flags & IPCORK_OPT)
1268 opt = inet->cork.opt;
1269
1270 if (rt->rt_type == RTN_MULTICAST)
1271 ttl = inet->mc_ttl;
1272 else
1273 ttl = ip_select_ttl(inet, &rt->u.dst);
1274
1275 iph = (struct iphdr *)skb->data;
1276 iph->version = 4;
1277 iph->ihl = 5;
1278 if (opt) {
1279 iph->ihl += opt->optlen>>2;
1280 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1281 }
1282 iph->tos = inet->tos;
1283 iph->frag_off = df;
1284 ip_select_ident(iph, &rt->u.dst, sk);
1285 iph->ttl = ttl;
1286 iph->protocol = sk->sk_protocol;
1287 iph->saddr = rt->rt_src;
1288 iph->daddr = rt->rt_dst;
1289
1290 skb->priority = sk->sk_priority;
1291 skb->mark = sk->sk_mark;
1292 skb->dst = dst_clone(&rt->u.dst);
1293
1294 if (iph->protocol == IPPROTO_ICMP)
1295 icmp_out_count(((struct icmphdr *)
1296 skb_transport_header(skb))->type);
1297
1298
1299 err = ip_local_out(skb);
1300 if (err) {
1301 if (err > 0)
1302 err = inet->recverr ? net_xmit_errno(err) : 0;
1303 if (err)
1304 goto error;
1305 }
1306
1307out:
1308 ip_cork_release(inet);
1309 return err;
1310
1311error:
1312 IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1313 goto out;
1314}
1315
1316
1317
1318
1319void ip_flush_pending_frames(struct sock *sk)
1320{
1321 struct sk_buff *skb;
1322
1323 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1324 kfree_skb(skb);
1325
1326 ip_cork_release(inet_sk(sk));
1327}
1328
1329
1330
1331
1332
1333static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1334 int len, int odd, struct sk_buff *skb)
1335{
1336 __wsum csum;
1337
1338 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1339 skb->csum = csum_block_add(skb->csum, csum, odd);
1340 return 0;
1341}
1342
1343
1344
1345
1346
1347
1348
1349
1350void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1351 unsigned int len)
1352{
1353 struct inet_sock *inet = inet_sk(sk);
1354 struct {
1355 struct ip_options opt;
1356 char data[40];
1357 } replyopts;
1358 struct ipcm_cookie ipc;
1359 __be32 daddr;
1360 struct rtable *rt = (struct rtable*)skb->dst;
1361
1362 if (ip_options_echo(&replyopts.opt, skb))
1363 return;
1364
1365 daddr = ipc.addr = rt->rt_src;
1366 ipc.opt = NULL;
1367
1368 if (replyopts.opt.optlen) {
1369 ipc.opt = &replyopts.opt;
1370
1371 if (ipc.opt->srr)
1372 daddr = replyopts.opt.faddr;
1373 }
1374
1375 {
1376 struct flowi fl = { .oif = arg->bound_dev_if,
1377 .nl_u = { .ip4_u =
1378 { .daddr = daddr,
1379 .saddr = rt->rt_spec_dst,
1380 .tos = RT_TOS(ip_hdr(skb)->tos) } },
1381
1382 .uli_u = { .ports =
1383 { .sport = tcp_hdr(skb)->dest,
1384 .dport = tcp_hdr(skb)->source } },
1385 .proto = sk->sk_protocol };
1386 security_skb_classify_flow(skb, &fl);
1387 if (ip_route_output_key(sk->sk_net, &rt, &fl))
1388 return;
1389 }
1390
1391
1392
1393
1394
1395
1396
1397 bh_lock_sock(sk);
1398 inet->tos = ip_hdr(skb)->tos;
1399 sk->sk_priority = skb->priority;
1400 sk->sk_protocol = ip_hdr(skb)->protocol;
1401 sk->sk_bound_dev_if = arg->bound_dev_if;
1402 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1403 &ipc, rt, MSG_DONTWAIT);
1404 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1405 if (arg->csumoffset >= 0)
1406 *((__sum16 *)skb_transport_header(skb) +
1407 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1408 arg->csum));
1409 skb->ip_summed = CHECKSUM_NONE;
1410 ip_push_pending_frames(sk);
1411 }
1412
1413 bh_unlock_sock(sk);
1414
1415 ip_rt_put(rt);
1416}
1417
1418void __init ip_init(void)
1419{
1420 ip_rt_init();
1421 inet_initpeers();
1422
1423#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1424 igmp_mc_proc_init();
1425#endif
1426}
1427
1428EXPORT_SYMBOL(ip_generic_getfrag);
1429EXPORT_SYMBOL(ip_queue_xmit);
1430EXPORT_SYMBOL(ip_send_check);
1431