1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47#include <asm/uaccess.h>
48#include <asm/system.h>
49#include <linux/module.h>
50#include <linux/types.h>
51#include <linux/kernel.h>
52#include <linux/sched.h>
53#include <linux/mm.h>
54#include <linux/string.h>
55#include <linux/errno.h>
56#include <linux/config.h>
57
58#include <linux/socket.h>
59#include <linux/sockios.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/etherdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/stat.h>
66#include <linux/init.h>
67
68#include <net/snmp.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/tcp.h>
73#include <net/udp.h>
74#include <linux/skbuff.h>
75#include <net/sock.h>
76#include <net/arp.h>
77#include <net/icmp.h>
78#include <net/raw.h>
79#include <net/checksum.h>
80#include <net/inetpeer.h>
81#include <linux/igmp.h>
82#include <linux/netfilter_ipv4.h>
83#include <linux/mroute.h>
84#include <linux/netlink.h>
85
86
87
88
89
90int sysctl_ip_dynaddr;
91int sysctl_ip_default_ttl = IPDEFTTL;
92
93
94__inline__ void ip_send_check(struct iphdr *iph)
95{
96 iph->check = 0;
97 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
98}
99
100
101static int ip_dev_loopback_xmit(struct sk_buff *newskb)
102{
103 newskb->mac.raw = newskb->data;
104 __skb_pull(newskb, newskb->nh.raw - newskb->data);
105 newskb->pkt_type = PACKET_LOOPBACK;
106 newskb->ip_summed = CHECKSUM_UNNECESSARY;
107 BUG_TRAP(newskb->dst);
108
109#ifdef CONFIG_NETFILTER_DEBUG
110 nf_debug_ip_loopback_xmit(newskb);
111#endif
112 netif_rx(newskb);
113 return 0;
114}
115
116static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst)
117{
118 int ttl = inet->uc_ttl;
119
120 if (ttl < 0)
121 ttl = dst_metric(dst, RTAX_HOPLIMIT);
122 return ttl;
123}
124
125
126
127
128
129int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
130 u32 saddr, u32 daddr, struct ip_options *opt)
131{
132 struct inet_opt *inet = inet_sk(sk);
133 struct rtable *rt = (struct rtable *)skb->dst;
134 struct iphdr *iph;
135
136
137 if (opt)
138 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
139 else
140 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
141
142 iph->version = 4;
143 iph->ihl = 5;
144 iph->tos = inet->tos;
145 if (ip_dont_fragment(sk, &rt->u.dst))
146 iph->frag_off = htons(IP_DF);
147 else
148 iph->frag_off = 0;
149 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
150 iph->daddr = rt->rt_dst;
151 iph->saddr = rt->rt_src;
152 iph->protocol = sk->sk_protocol;
153 iph->tot_len = htons(skb->len);
154 ip_select_ident(iph, &rt->u.dst, sk);
155 skb->nh.iph = iph;
156
157 if (opt && opt->optlen) {
158 iph->ihl += opt->optlen>>2;
159 ip_options_build(skb, opt, daddr, rt, 0);
160 }
161 ip_send_check(iph);
162
163 skb->priority = sk->sk_priority;
164
165
166 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
167 dst_output);
168}
169
170static inline int ip_finish_output2(struct sk_buff *skb)
171{
172 struct dst_entry *dst = skb->dst;
173 struct hh_cache *hh = dst->hh;
174 struct net_device *dev = dst->dev;
175 int hh_len = LL_RESERVED_SPACE(dev);
176
177
178 if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
179 struct sk_buff *skb2;
180
181 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
182 if (skb2 == NULL) {
183 kfree_skb(skb);
184 return -ENOMEM;
185 }
186 if (skb->sk)
187 skb_set_owner_w(skb2, skb->sk);
188 kfree_skb(skb);
189 skb = skb2;
190 }
191
192#ifdef CONFIG_NETFILTER_DEBUG
193 nf_debug_ip_finish_output2(skb);
194#endif
195
196 if (hh) {
197 int hh_alen;
198
199 read_lock_bh(&hh->hh_lock);
200 hh_alen = HH_DATA_ALIGN(hh->hh_len);
201 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
202 read_unlock_bh(&hh->hh_lock);
203 skb_push(skb, hh->hh_len);
204 return hh->hh_output(skb);
205 } else if (dst->neighbour)
206 return dst->neighbour->output(skb);
207
208 if (net_ratelimit())
209 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
210 kfree_skb(skb);
211 return -EINVAL;
212}
213
214int ip_finish_output(struct sk_buff *skb)
215{
216 struct net_device *dev = skb->dst->dev;
217
218 skb->dev = dev;
219 skb->protocol = htons(ETH_P_IP);
220
221 return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev,
222 ip_finish_output2);
223}
224
225int ip_mc_output(struct sk_buff *skb)
226{
227 struct sock *sk = skb->sk;
228 struct rtable *rt = (struct rtable*)skb->dst;
229 struct net_device *dev = rt->u.dst.dev;
230
231
232
233
234 IP_INC_STATS(IpOutRequests);
235
236 skb->dev = dev;
237 skb->protocol = htons(ETH_P_IP);
238
239
240
241
242
243 if (rt->rt_flags&RTCF_MULTICAST) {
244 if ((!sk || inet_sk(sk)->mc_loop)
245#ifdef CONFIG_IP_MROUTE
246
247
248
249
250
251
252
253
254 && ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
255#endif
256 ) {
257 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
258 if (newskb)
259 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
260 newskb->dev,
261 ip_dev_loopback_xmit);
262 }
263
264
265
266 if (skb->nh.iph->ttl == 0) {
267 kfree_skb(skb);
268 return 0;
269 }
270 }
271
272 if (rt->rt_flags&RTCF_BROADCAST) {
273 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
274 if (newskb)
275 NF_HOOK(PF_INET, NF_IP_POST_ROUTING, newskb, NULL,
276 newskb->dev, ip_dev_loopback_xmit);
277 }
278
279 if (skb->len > dst_pmtu(&rt->u.dst) || skb_shinfo(skb)->frag_list)
280 return ip_fragment(skb, ip_finish_output);
281 else
282 return ip_finish_output(skb);
283}
284
285int ip_output(struct sk_buff *skb)
286{
287 IP_INC_STATS(IpOutRequests);
288
289 if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list) &&
290 !skb_shinfo(skb)->tso_size)
291 return ip_fragment(skb, ip_finish_output);
292 else
293 return ip_finish_output(skb);
294}
295
296int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
297{
298 struct sock *sk = skb->sk;
299 struct inet_opt *inet = inet_sk(sk);
300 struct ip_options *opt = inet->opt;
301 struct rtable *rt;
302 struct iphdr *iph;
303 u32 mtu;
304
305
306
307
308 rt = (struct rtable *) skb->dst;
309 if (rt != NULL)
310 goto packet_routed;
311
312
313 rt = (struct rtable *)__sk_dst_check(sk, 0);
314 if (rt == NULL) {
315 u32 daddr;
316
317
318 daddr = inet->daddr;
319 if(opt && opt->srr)
320 daddr = opt->faddr;
321
322 {
323 struct flowi fl = { .oif = sk->sk_bound_dev_if,
324 .nl_u = { .ip4_u =
325 { .daddr = daddr,
326 .saddr = inet->saddr,
327 .tos = RT_CONN_FLAGS(sk) } },
328 .proto = sk->sk_protocol,
329 .uli_u = { .ports =
330 { .sport = inet->sport,
331 .dport = inet->dport } } };
332
333
334
335
336
337 if (ip_route_output_flow(&rt, &fl, sk, 0))
338 goto no_route;
339 }
340 __sk_dst_set(sk, &rt->u.dst);
341 tcp_v4_setup_caps(sk, &rt->u.dst);
342 }
343 skb->dst = dst_clone(&rt->u.dst);
344
345packet_routed:
346 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
347 goto no_route;
348
349
350 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
351 *((__u16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
352 iph->tot_len = htons(skb->len);
353 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
354 iph->frag_off = htons(IP_DF);
355 else
356 iph->frag_off = 0;
357 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
358 iph->protocol = sk->sk_protocol;
359 iph->saddr = rt->rt_src;
360 iph->daddr = rt->rt_dst;
361 skb->nh.iph = iph;
362
363
364 if(opt && opt->optlen) {
365 iph->ihl += opt->optlen >> 2;
366 ip_options_build(skb, opt, inet->daddr, rt, 0);
367 }
368
369 mtu = dst_pmtu(&rt->u.dst);
370 if (skb->len > mtu && (sk->sk_route_caps & NETIF_F_TSO)) {
371 unsigned int hlen;
372
373
374 hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
375 skb_shinfo(skb)->tso_size = mtu - hlen;
376 skb_shinfo(skb)->tso_segs =
377 (skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
378 skb_shinfo(skb)->tso_size - 1;
379 }
380
381 ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
382
383
384 ip_send_check(iph);
385
386 skb->priority = sk->sk_priority;
387
388 return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
389 dst_output);
390
391no_route:
392 IP_INC_STATS(IpOutNoRoutes);
393 kfree_skb(skb);
394 return -EHOSTUNREACH;
395}
396
397
398static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
399{
400 to->pkt_type = from->pkt_type;
401 to->priority = from->priority;
402 to->protocol = from->protocol;
403 to->security = from->security;
404 to->dst = dst_clone(from->dst);
405 to->dev = from->dev;
406
407
408 IPCB(to)->flags = IPCB(from)->flags;
409
410#ifdef CONFIG_NET_SCHED
411 to->tc_index = from->tc_index;
412#endif
413#ifdef CONFIG_NETFILTER
414 to->nfmark = from->nfmark;
415 to->nfcache = from->nfcache;
416
417 to->nfct = from->nfct;
418 nf_conntrack_get(to->nfct);
419#ifdef CONFIG_BRIDGE_NETFILTER
420 nf_bridge_put(to->nf_bridge);
421 to->nf_bridge = from->nf_bridge;
422 nf_bridge_get(to->nf_bridge);
423#endif
424#ifdef CONFIG_NETFILTER_DEBUG
425 to->nf_debug = from->nf_debug;
426#endif
427#endif
428}
429
430
431
432
433
434
435
436
437int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
438{
439 struct iphdr *iph;
440 int raw = 0;
441 int ptr;
442 struct net_device *dev;
443 struct sk_buff *skb2;
444 unsigned int mtu, hlen, left, len;
445 int offset;
446 int not_last_frag;
447 struct rtable *rt = (struct rtable*)skb->dst;
448 int err = 0;
449
450 dev = rt->u.dst.dev;
451
452
453
454
455
456 iph = skb->nh.iph;
457
458 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
459 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
460 htonl(dst_pmtu(&rt->u.dst)));
461 kfree_skb(skb);
462 return -EMSGSIZE;
463 }
464
465
466
467
468
469 hlen = iph->ihl * 4;
470 mtu = dst_pmtu(&rt->u.dst) - hlen;
471
472
473
474
475
476
477
478
479 if (skb_shinfo(skb)->frag_list) {
480 struct sk_buff *frag;
481 int first_len = skb_pagelen(skb);
482
483 if (first_len - hlen > mtu ||
484 ((first_len - hlen) & 7) ||
485 (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
486 skb_cloned(skb))
487 goto slow_path;
488
489 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
490
491 if (frag->len > mtu ||
492 ((frag->len & 7) && frag->next) ||
493 skb_headroom(frag) < hlen)
494 goto slow_path;
495
496
497 if (frag->sk == NULL && skb->sk)
498 goto slow_path;
499
500
501 if (skb_shared(frag))
502 goto slow_path;
503 }
504
505
506
507 err = 0;
508 offset = 0;
509 frag = skb_shinfo(skb)->frag_list;
510 skb_shinfo(skb)->frag_list = 0;
511 skb->data_len = first_len - skb_headlen(skb);
512 skb->len = first_len;
513 iph->tot_len = htons(first_len);
514 iph->frag_off |= htons(IP_MF);
515 ip_send_check(iph);
516
517 for (;;) {
518
519
520 if (frag) {
521 frag->h.raw = frag->data;
522 frag->nh.raw = __skb_push(frag, hlen);
523 memcpy(frag->nh.raw, iph, hlen);
524 iph = frag->nh.iph;
525 iph->tot_len = htons(frag->len);
526 ip_copy_metadata(frag, skb);
527 if (offset == 0)
528 ip_options_fragment(frag);
529 offset += skb->len - hlen;
530 iph->frag_off = htons(offset>>3);
531 if (frag->next != NULL)
532 iph->frag_off |= htons(IP_MF);
533
534 ip_send_check(iph);
535 }
536
537 err = output(skb);
538
539 if (err || !frag)
540 break;
541
542 skb = frag;
543 frag = skb->next;
544 skb->next = NULL;
545 }
546
547 if (err == 0) {
548 IP_INC_STATS(IpFragOKs);
549 return 0;
550 }
551
552 while (frag) {
553 skb = frag->next;
554 kfree_skb(frag);
555 frag = skb;
556 }
557 IP_INC_STATS(IpFragFails);
558 return err;
559 }
560
561slow_path:
562 left = skb->len - hlen;
563 ptr = raw + hlen;
564
565
566
567
568
569 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
570 not_last_frag = iph->frag_off & htons(IP_MF);
571
572
573
574
575
576 while(left > 0) {
577 len = left;
578
579 if (len > mtu)
580 len = mtu;
581
582
583 if (len < left) {
584 len &= ~7;
585 }
586
587
588
589
590 if ((skb2 = alloc_skb(len+hlen+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
591 NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
592 err = -ENOMEM;
593 goto fail;
594 }
595
596
597
598
599
600 ip_copy_metadata(skb2, skb);
601 skb_reserve(skb2, LL_RESERVED_SPACE(rt->u.dst.dev));
602 skb_put(skb2, len + hlen);
603 skb2->nh.raw = skb2->data;
604 skb2->h.raw = skb2->data + hlen;
605
606
607
608
609
610
611 if (skb->sk)
612 skb_set_owner_w(skb2, skb->sk);
613
614
615
616
617
618 memcpy(skb2->nh.raw, skb->data, hlen);
619
620
621
622
623 if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
624 BUG();
625 left -= len;
626
627
628
629
630 iph = skb2->nh.iph;
631 iph->frag_off = htons((offset >> 3));
632
633
634
635
636
637
638
639 if (offset == 0)
640 ip_options_fragment(skb);
641
642
643
644
645
646 if (left > 0 || not_last_frag)
647 iph->frag_off |= htons(IP_MF);
648 ptr += len;
649 offset += len;
650
651
652
653
654
655 IP_INC_STATS(IpFragCreates);
656
657 iph->tot_len = htons(len + hlen);
658
659 ip_send_check(iph);
660
661 err = output(skb2);
662 if (err)
663 goto fail;
664 }
665 kfree_skb(skb);
666 IP_INC_STATS(IpFragOKs);
667 return err;
668
669fail:
670 kfree_skb(skb);
671 IP_INC_STATS(IpFragFails);
672 return err;
673}
674
675int
676ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
677{
678 struct iovec *iov = from;
679
680 if (skb->ip_summed == CHECKSUM_HW) {
681 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
682 return -EFAULT;
683 } else {
684 unsigned int csum = 0;
685 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
686 return -EFAULT;
687 skb->csum = csum_block_add(skb->csum, csum, odd);
688 }
689 return 0;
690}
691
692static inline int
693skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
694{
695 if (i) {
696 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
697 return page == frag->page &&
698 off == frag->page_offset+frag->size;
699 }
700 return 0;
701}
702
703static inline unsigned int
704csum_page(struct page *page, int offset, int copy)
705{
706 char *kaddr;
707 unsigned int csum;
708 kaddr = kmap(page);
709 csum = csum_partial(kaddr + offset, copy, 0);
710 kunmap(page);
711 return csum;
712}
713
714
715
716
717
718
719
720
721
722
723
724
725int ip_append_data(struct sock *sk,
726 int getfrag(void *from, char *to, int offset, int len,
727 int odd, struct sk_buff *skb),
728 void *from, int length, int transhdrlen,
729 struct ipcm_cookie *ipc, struct rtable *rt,
730 unsigned int flags)
731{
732 struct inet_opt *inet = inet_sk(sk);
733 struct sk_buff *skb;
734
735 struct ip_options *opt = NULL;
736 int hh_len;
737 int exthdrlen;
738 int mtu;
739 int copy;
740 int err;
741 int offset = 0;
742 unsigned int maxfraglen, fragheaderlen;
743 int csummode = CHECKSUM_NONE;
744
745 if (flags&MSG_PROBE)
746 return 0;
747
748 if (skb_queue_empty(&sk->sk_write_queue)) {
749
750
751
752 opt = ipc->opt;
753 if (opt) {
754 if (inet->cork.opt == NULL)
755 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
756 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
757 inet->cork.flags |= IPCORK_OPT;
758 inet->cork.addr = ipc->addr;
759 }
760 dst_hold(&rt->u.dst);
761 inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
762 inet->cork.rt = rt;
763 inet->cork.length = 0;
764 inet->sndmsg_page = NULL;
765 inet->sndmsg_off = 0;
766 if ((exthdrlen = rt->u.dst.header_len) != 0) {
767 length += exthdrlen;
768 transhdrlen += exthdrlen;
769 }
770 } else {
771 rt = inet->cork.rt;
772 if (inet->cork.flags & IPCORK_OPT)
773 opt = inet->cork.opt;
774
775 transhdrlen = 0;
776 exthdrlen = 0;
777 mtu = inet->cork.fragsize;
778 }
779 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
780
781 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
782 maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
783
784 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
785 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu-exthdrlen);
786 return -EMSGSIZE;
787 }
788
789
790
791
792
793 if (transhdrlen &&
794 length + fragheaderlen <= maxfraglen &&
795 rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
796 !exthdrlen)
797 csummode = CHECKSUM_HW;
798
799 inet->cork.length += length;
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
815 goto alloc_new_skb;
816
817 while (length > 0) {
818 if ((copy = maxfraglen - skb->len) <= 0) {
819 char *data;
820 unsigned int datalen;
821 unsigned int fraglen;
822 unsigned int alloclen;
823 BUG_TRAP(copy == 0);
824
825alloc_new_skb:
826 datalen = maxfraglen - fragheaderlen;
827 if (datalen > length)
828 datalen = length;
829
830 fraglen = datalen + fragheaderlen;
831 if ((flags & MSG_MORE) &&
832 !(rt->u.dst.dev->features&NETIF_F_SG))
833 alloclen = maxfraglen;
834 else
835 alloclen = datalen + fragheaderlen;
836
837
838
839
840
841
842 if (datalen == length)
843 alloclen += rt->u.dst.trailer_len;
844
845 if (transhdrlen) {
846 skb = sock_alloc_send_skb(sk,
847 alloclen + hh_len + 15,
848 (flags & MSG_DONTWAIT), &err);
849 } else {
850 skb = NULL;
851 if (atomic_read(&sk->sk_wmem_alloc) <=
852 2 * sk->sk_sndbuf)
853 skb = sock_wmalloc(sk,
854 alloclen + hh_len + 15, 1,
855 sk->sk_allocation);
856 if (unlikely(skb == NULL))
857 err = -ENOBUFS;
858 }
859 if (skb == NULL)
860 goto error;
861
862
863
864
865 skb->ip_summed = csummode;
866 skb->csum = 0;
867 skb_reserve(skb, hh_len);
868
869
870
871
872 data = skb_put(skb, fraglen);
873 skb->nh.raw = data + exthdrlen;
874 data += fragheaderlen;
875 skb->h.raw = data + exthdrlen;
876
877 copy = datalen - transhdrlen;
878 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
879 err = -EFAULT;
880 kfree_skb(skb);
881 goto error;
882 }
883
884 offset += copy;
885 length -= datalen;
886 transhdrlen = 0;
887 exthdrlen = 0;
888 csummode = CHECKSUM_NONE;
889
890
891
892
893 __skb_queue_tail(&sk->sk_write_queue, skb);
894 continue;
895 }
896
897 if (copy > length)
898 copy = length;
899
900 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
901 unsigned int off;
902
903 off = skb->len;
904 if (getfrag(from, skb_put(skb, copy),
905 offset, copy, off, skb) < 0) {
906 __skb_trim(skb, off);
907 err = -EFAULT;
908 goto error;
909 }
910 } else {
911 int i = skb_shinfo(skb)->nr_frags;
912 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
913 struct page *page = inet->sndmsg_page;
914 int off = inet->sndmsg_off;
915 unsigned int left;
916
917 if (page && (left = PAGE_SIZE - off) > 0) {
918 if (copy >= left)
919 copy = left;
920 if (page != frag->page) {
921 if (i == MAX_SKB_FRAGS) {
922 err = -EMSGSIZE;
923 goto error;
924 }
925 get_page(page);
926 skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
927 frag = &skb_shinfo(skb)->frags[i];
928 }
929 } else if (i < MAX_SKB_FRAGS) {
930 if (copy > PAGE_SIZE)
931 copy = PAGE_SIZE;
932 page = alloc_pages(sk->sk_allocation, 0);
933 if (page == NULL) {
934 err = -ENOMEM;
935 goto error;
936 }
937 inet->sndmsg_page = page;
938 inet->sndmsg_off = 0;
939
940 skb_fill_page_desc(skb, i, page, 0, 0);
941 frag = &skb_shinfo(skb)->frags[i];
942 skb->truesize += PAGE_SIZE;
943 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
944 } else {
945 err = -EMSGSIZE;
946 goto error;
947 }
948 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
949 err = -EFAULT;
950 goto error;
951 }
952 inet->sndmsg_off += copy;
953 frag->size += copy;
954 skb->len += copy;
955 skb->data_len += copy;
956 }
957 offset += copy;
958 length -= copy;
959 }
960
961 return 0;
962
963error:
964 inet->cork.length -= length;
965 IP_INC_STATS(IpOutDiscards);
966 return err;
967}
968
969ssize_t ip_append_page(struct sock *sk, struct page *page,
970 int offset, size_t size, int flags)
971{
972 struct inet_opt *inet = inet_sk(sk);
973 struct sk_buff *skb;
974 struct rtable *rt;
975 struct ip_options *opt = NULL;
976 int hh_len;
977 int mtu;
978 int len;
979 int err;
980 unsigned int maxfraglen, fragheaderlen;
981
982 if (inet->hdrincl)
983 return -EPERM;
984
985 if (flags&MSG_PROBE)
986 return 0;
987
988 if (skb_queue_empty(&sk->sk_write_queue))
989 return -EINVAL;
990
991 rt = inet->cork.rt;
992 if (inet->cork.flags & IPCORK_OPT)
993 opt = inet->cork.opt;
994
995 if (!(rt->u.dst.dev->features&NETIF_F_SG))
996 return -EOPNOTSUPP;
997
998 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
999 mtu = inet->cork.fragsize;
1000
1001 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1002 maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
1003
1004 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1005 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport, mtu);
1006 return -EMSGSIZE;
1007 }
1008
1009 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1010 return -EINVAL;
1011
1012 inet->cork.length += size;
1013
1014 while (size > 0) {
1015 int i;
1016 if ((len = maxfraglen - skb->len) <= 0) {
1017 char *data;
1018 struct iphdr *iph;
1019 BUG_TRAP(len == 0);
1020
1021 skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1,
1022 sk->sk_allocation);
1023 if (unlikely(!skb)) {
1024 err = -ENOBUFS;
1025 goto error;
1026 }
1027
1028
1029
1030
1031 skb->ip_summed = CHECKSUM_NONE;
1032 skb->csum = 0;
1033 skb_reserve(skb, hh_len);
1034
1035
1036
1037
1038 data = skb_put(skb, fragheaderlen);
1039 skb->nh.iph = iph = (struct iphdr *)data;
1040 data += fragheaderlen;
1041 skb->h.raw = data;
1042
1043
1044
1045
1046 __skb_queue_tail(&sk->sk_write_queue, skb);
1047 continue;
1048 }
1049
1050 i = skb_shinfo(skb)->nr_frags;
1051 if (len > size)
1052 len = size;
1053 if (skb_can_coalesce(skb, i, page, offset)) {
1054 skb_shinfo(skb)->frags[i-1].size += len;
1055 } else if (i < MAX_SKB_FRAGS) {
1056 get_page(page);
1057 skb_fill_page_desc(skb, i, page, offset, len);
1058 } else {
1059 err = -EMSGSIZE;
1060 goto error;
1061 }
1062
1063 if (skb->ip_summed == CHECKSUM_NONE) {
1064 unsigned int csum;
1065 csum = csum_page(page, offset, len);
1066 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1067 }
1068
1069 skb->len += len;
1070 skb->data_len += len;
1071 offset += len;
1072 size -= len;
1073 }
1074 return 0;
1075
1076error:
1077 inet->cork.length -= size;
1078 IP_INC_STATS(IpOutDiscards);
1079 return err;
1080}
1081
1082
1083
1084
1085
1086int ip_push_pending_frames(struct sock *sk)
1087{
1088 struct sk_buff *skb, *tmp_skb;
1089 struct sk_buff **tail_skb;
1090 struct inet_opt *inet = inet_sk(sk);
1091 struct ip_options *opt = NULL;
1092 struct rtable *rt = inet->cork.rt;
1093 struct iphdr *iph;
1094 int df = 0;
1095 __u8 ttl;
1096 int err = 0;
1097
1098 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1099 goto out;
1100 tail_skb = &(skb_shinfo(skb)->frag_list);
1101
1102
1103 if (skb->data < skb->nh.raw)
1104 __skb_pull(skb, skb->nh.raw - skb->data);
1105 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1106 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1107 *tail_skb = tmp_skb;
1108 tail_skb = &(tmp_skb->next);
1109 skb->len += tmp_skb->len;
1110 skb->data_len += tmp_skb->len;
1111#if 0
1112 skb->truesize += tmp_skb->truesize;
1113 __sock_put(tmp_skb->sk);
1114 tmp_skb->destructor = NULL;
1115 tmp_skb->sk = NULL;
1116#endif
1117 }
1118
1119
1120
1121
1122
1123 if (inet->pmtudisc != IP_PMTUDISC_DO)
1124 skb->local_df = 1;
1125
1126
1127
1128
1129 if (inet->pmtudisc == IP_PMTUDISC_DO ||
1130 (!skb_shinfo(skb)->frag_list && ip_dont_fragment(sk, &rt->u.dst)))
1131 df = htons(IP_DF);
1132
1133 if (inet->cork.flags & IPCORK_OPT)
1134 opt = inet->cork.opt;
1135
1136 if (rt->rt_type == RTN_MULTICAST)
1137 ttl = inet->mc_ttl;
1138 else
1139 ttl = ip_select_ttl(inet, &rt->u.dst);
1140
1141 iph = (struct iphdr *)skb->data;
1142 iph->version = 4;
1143 iph->ihl = 5;
1144 if (opt) {
1145 iph->ihl += opt->optlen>>2;
1146 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1147 }
1148 iph->tos = inet->tos;
1149 iph->tot_len = htons(skb->len);
1150 iph->frag_off = df;
1151 if (!df) {
1152 __ip_select_ident(iph, &rt->u.dst, 0);
1153 } else {
1154 iph->id = htons(inet->id++);
1155 }
1156 iph->ttl = ttl;
1157 iph->protocol = sk->sk_protocol;
1158 iph->saddr = rt->rt_src;
1159 iph->daddr = rt->rt_dst;
1160 ip_send_check(iph);
1161
1162 skb->priority = sk->sk_priority;
1163 skb->dst = dst_clone(&rt->u.dst);
1164
1165
1166 err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
1167 skb->dst->dev, dst_output);
1168 if (err) {
1169 if (err > 0)
1170 err = inet->recverr ? net_xmit_errno(err) : 0;
1171 if (err)
1172 goto error;
1173 }
1174
1175out:
1176 inet->cork.flags &= ~IPCORK_OPT;
1177 if (inet->cork.rt) {
1178 ip_rt_put(inet->cork.rt);
1179 inet->cork.rt = NULL;
1180 }
1181 return err;
1182
1183error:
1184 IP_INC_STATS(IpOutDiscards);
1185 goto out;
1186}
1187
1188
1189
1190
1191void ip_flush_pending_frames(struct sock *sk)
1192{
1193 struct inet_opt *inet = inet_sk(sk);
1194 struct sk_buff *skb;
1195
1196 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1197 kfree_skb(skb);
1198
1199 inet->cork.flags &= ~IPCORK_OPT;
1200 if (inet->cork.opt) {
1201 kfree(inet->cork.opt);
1202 inet->cork.opt = NULL;
1203 }
1204 if (inet->cork.rt) {
1205 ip_rt_put(inet->cork.rt);
1206 inet->cork.rt = NULL;
1207 }
1208}
1209
1210
1211
1212
1213
1214static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1215 int len, int odd, struct sk_buff *skb)
1216{
1217 unsigned int csum;
1218
1219 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1220 skb->csum = csum_block_add(skb->csum, csum, odd);
1221 return 0;
1222}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1234 unsigned int len)
1235{
1236 struct inet_opt *inet = inet_sk(sk);
1237 struct {
1238 struct ip_options opt;
1239 char data[40];
1240 } replyopts;
1241 struct ipcm_cookie ipc;
1242 u32 daddr;
1243 struct rtable *rt = (struct rtable*)skb->dst;
1244
1245 if (ip_options_echo(&replyopts.opt, skb))
1246 return;
1247
1248 daddr = ipc.addr = rt->rt_src;
1249 ipc.opt = NULL;
1250
1251 if (replyopts.opt.optlen) {
1252 ipc.opt = &replyopts.opt;
1253
1254 if (ipc.opt->srr)
1255 daddr = replyopts.opt.faddr;
1256 }
1257
1258 {
1259 struct flowi fl = { .nl_u = { .ip4_u =
1260 { .daddr = daddr,
1261 .saddr = rt->rt_spec_dst,
1262 .tos = RT_TOS(skb->nh.iph->tos) } },
1263
1264 .uli_u = { .ports =
1265 { .sport = skb->h.th->dest,
1266 .dport = skb->h.th->source } },
1267 .proto = sk->sk_protocol };
1268 if (ip_route_output_key(&rt, &fl))
1269 return;
1270 }
1271
1272
1273
1274
1275
1276
1277
1278 bh_lock_sock(sk);
1279 inet->tos = skb->nh.iph->tos;
1280 sk->sk_priority = skb->priority;
1281 sk->sk_protocol = skb->nh.iph->protocol;
1282 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1283 &ipc, rt, MSG_DONTWAIT);
1284 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1285 if (arg->csumoffset >= 0)
1286 *((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
1287 skb->ip_summed = CHECKSUM_NONE;
1288 ip_push_pending_frames(sk);
1289 }
1290
1291 bh_unlock_sock(sk);
1292
1293 ip_rt_put(rt);
1294}
1295
1296
1297
1298
1299
1300static struct packet_type ip_packet_type = {
1301 .type = __constant_htons(ETH_P_IP),
1302 .func = ip_rcv,
1303};
1304
1305
1306
1307
1308
1309void __init ip_init(void)
1310{
1311 dev_add_pack(&ip_packet_type);
1312
1313 ip_rt_init();
1314 inet_initpeers();
1315
1316#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1317 igmp_mc_proc_init();
1318#endif
1319}
1320
1321EXPORT_SYMBOL(ip_finish_output);
1322EXPORT_SYMBOL(ip_fragment);
1323EXPORT_SYMBOL(ip_generic_getfrag);
1324EXPORT_SYMBOL(ip_queue_xmit);
1325EXPORT_SYMBOL(ip_send_check);
1326
1327#ifdef CONFIG_SYSCTL
1328EXPORT_SYMBOL(sysctl_ip_default_ttl);
1329#endif
1330