1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45#include <asm/uaccess.h>
46#include <asm/system.h>
47#include <linux/module.h>
48#include <linux/types.h>
49#include <linux/kernel.h>
50#include <linux/mm.h>
51#include <linux/string.h>
52#include <linux/errno.h>
53#include <linux/highmem.h>
54#include <linux/slab.h>
55
56#include <linux/socket.h>
57#include <linux/sockios.h>
58#include <linux/in.h>
59#include <linux/inet.h>
60#include <linux/netdevice.h>
61#include <linux/etherdevice.h>
62#include <linux/proc_fs.h>
63#include <linux/stat.h>
64#include <linux/init.h>
65
66#include <net/snmp.h>
67#include <net/ip.h>
68#include <net/protocol.h>
69#include <net/route.h>
70#include <net/xfrm.h>
71#include <linux/skbuff.h>
72#include <net/sock.h>
73#include <net/arp.h>
74#include <net/icmp.h>
75#include <net/checksum.h>
76#include <net/inetpeer.h>
77#include <linux/igmp.h>
78#include <linux/netfilter_ipv4.h>
79#include <linux/netfilter_bridge.h>
80#include <linux/mroute.h>
81#include <linux/netlink.h>
82#include <linux/tcp.h>
83
84int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
85
86
87__inline__ void ip_send_check(struct iphdr *iph)
88{
89 iph->check = 0;
90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
91}
92
93int __ip_local_out(struct sk_buff *skb)
94{
95 struct iphdr *iph = ip_hdr(skb);
96
97 iph->tot_len = htons(skb->len);
98 ip_send_check(iph);
99 return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
100 skb_dst(skb)->dev, dst_output);
101}
102
103int ip_local_out(struct sk_buff *skb)
104{
105 int err;
106
107 err = __ip_local_out(skb);
108 if (likely(err == 1))
109 err = dst_output(skb);
110
111 return err;
112}
113EXPORT_SYMBOL_GPL(ip_local_out);
114
115
116static int ip_dev_loopback_xmit(struct sk_buff *newskb)
117{
118 skb_reset_mac_header(newskb);
119 __skb_pull(newskb, skb_network_offset(newskb));
120 newskb->pkt_type = PACKET_LOOPBACK;
121 newskb->ip_summed = CHECKSUM_UNNECESSARY;
122 WARN_ON(!skb_dst(newskb));
123 netif_rx_ni(newskb);
124 return 0;
125}
126
127static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
128{
129 int ttl = inet->uc_ttl;
130
131 if (ttl < 0)
132 ttl = dst_metric(dst, RTAX_HOPLIMIT);
133 return ttl;
134}
135
136
137
138
139
140int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
141 __be32 saddr, __be32 daddr, struct ip_options *opt)
142{
143 struct inet_sock *inet = inet_sk(sk);
144 struct rtable *rt = skb_rtable(skb);
145 struct iphdr *iph;
146
147
148 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
149 skb_reset_network_header(skb);
150 iph = ip_hdr(skb);
151 iph->version = 4;
152 iph->ihl = 5;
153 iph->tos = inet->tos;
154 if (ip_dont_fragment(sk, &rt->u.dst))
155 iph->frag_off = htons(IP_DF);
156 else
157 iph->frag_off = 0;
158 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
159 iph->daddr = rt->rt_dst;
160 iph->saddr = rt->rt_src;
161 iph->protocol = sk->sk_protocol;
162 ip_select_ident(iph, &rt->u.dst, sk);
163
164 if (opt && opt->optlen) {
165 iph->ihl += opt->optlen>>2;
166 ip_options_build(skb, opt, daddr, rt, 0);
167 }
168
169 skb->priority = sk->sk_priority;
170 skb->mark = sk->sk_mark;
171
172
173 return ip_local_out(skb);
174}
175
176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
177
178static inline int ip_finish_output2(struct sk_buff *skb)
179{
180 struct dst_entry *dst = skb_dst(skb);
181 struct rtable *rt = (struct rtable *)dst;
182 struct net_device *dev = dst->dev;
183 unsigned int hh_len = LL_RESERVED_SPACE(dev);
184
185 if (rt->rt_type == RTN_MULTICAST) {
186 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
187 } else if (rt->rt_type == RTN_BROADCAST)
188 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
189
190
191 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
192 struct sk_buff *skb2;
193
194 skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
195 if (skb2 == NULL) {
196 kfree_skb(skb);
197 return -ENOMEM;
198 }
199 if (skb->sk)
200 skb_set_owner_w(skb2, skb->sk);
201 kfree_skb(skb);
202 skb = skb2;
203 }
204
205 if (dst->hh)
206 return neigh_hh_output(dst->hh, skb);
207 else if (dst->neighbour)
208 return dst->neighbour->output(skb);
209
210 if (net_ratelimit())
211 printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n");
212 kfree_skb(skb);
213 return -EINVAL;
214}
215
216static inline int ip_skb_dst_mtu(struct sk_buff *skb)
217{
218 struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
219
220 return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
221 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
222}
223
224static int ip_finish_output(struct sk_buff *skb)
225{
226#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
227
228 if (skb_dst(skb)->xfrm != NULL) {
229 IPCB(skb)->flags |= IPSKB_REROUTED;
230 return dst_output(skb);
231 }
232#endif
233 if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
234 return ip_fragment(skb, ip_finish_output2);
235 else
236 return ip_finish_output2(skb);
237}
238
239int ip_mc_output(struct sk_buff *skb)
240{
241 struct sock *sk = skb->sk;
242 struct rtable *rt = skb_rtable(skb);
243 struct net_device *dev = rt->u.dst.dev;
244
245
246
247
248 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
249
250 skb->dev = dev;
251 skb->protocol = htons(ETH_P_IP);
252
253
254
255
256
257 if (rt->rt_flags&RTCF_MULTICAST) {
258 if (sk_mc_loop(sk)
259#ifdef CONFIG_IP_MROUTE
260
261
262
263
264
265
266
267
268 &&
269 ((rt->rt_flags & RTCF_LOCAL) ||
270 !(IPCB(skb)->flags & IPSKB_FORWARDED))
271#endif
272 ) {
273 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
274 if (newskb)
275 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
276 newskb, NULL, newskb->dev,
277 ip_dev_loopback_xmit);
278 }
279
280
281
282 if (ip_hdr(skb)->ttl == 0) {
283 kfree_skb(skb);
284 return 0;
285 }
286 }
287
288 if (rt->rt_flags&RTCF_BROADCAST) {
289 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
290 if (newskb)
291 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
292 NULL, newskb->dev, ip_dev_loopback_xmit);
293 }
294
295 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
296 skb->dev, ip_finish_output,
297 !(IPCB(skb)->flags & IPSKB_REROUTED));
298}
299
300int ip_output(struct sk_buff *skb)
301{
302 struct net_device *dev = skb_dst(skb)->dev;
303
304 IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
305
306 skb->dev = dev;
307 skb->protocol = htons(ETH_P_IP);
308
309 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
310 ip_finish_output,
311 !(IPCB(skb)->flags & IPSKB_REROUTED));
312}
313
314int ip_queue_xmit(struct sk_buff *skb)
315{
316 struct sock *sk = skb->sk;
317 struct inet_sock *inet = inet_sk(sk);
318 struct ip_options *opt = inet->opt;
319 struct rtable *rt;
320 struct iphdr *iph;
321 int res;
322
323
324
325
326 rcu_read_lock();
327 rt = skb_rtable(skb);
328 if (rt != NULL)
329 goto packet_routed;
330
331
332 rt = (struct rtable *)__sk_dst_check(sk, 0);
333 if (rt == NULL) {
334 __be32 daddr;
335
336
337 daddr = inet->inet_daddr;
338 if(opt && opt->srr)
339 daddr = opt->faddr;
340
341 {
342 struct flowi fl = { .oif = sk->sk_bound_dev_if,
343 .mark = sk->sk_mark,
344 .nl_u = { .ip4_u =
345 { .daddr = daddr,
346 .saddr = inet->inet_saddr,
347 .tos = RT_CONN_FLAGS(sk) } },
348 .proto = sk->sk_protocol,
349 .flags = inet_sk_flowi_flags(sk),
350 .uli_u = { .ports =
351 { .sport = inet->inet_sport,
352 .dport = inet->inet_dport } } };
353
354
355
356
357
358 security_sk_classify_flow(sk, &fl);
359 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
360 goto no_route;
361 }
362 sk_setup_caps(sk, &rt->u.dst);
363 }
364 skb_dst_set_noref(skb, &rt->u.dst);
365
366packet_routed:
367 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
368 goto no_route;
369
370
371 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
372 skb_reset_network_header(skb);
373 iph = ip_hdr(skb);
374 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
375 if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
376 iph->frag_off = htons(IP_DF);
377 else
378 iph->frag_off = 0;
379 iph->ttl = ip_select_ttl(inet, &rt->u.dst);
380 iph->protocol = sk->sk_protocol;
381 iph->saddr = rt->rt_src;
382 iph->daddr = rt->rt_dst;
383
384
385 if (opt && opt->optlen) {
386 iph->ihl += opt->optlen >> 2;
387 ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
388 }
389
390 ip_select_ident_more(iph, &rt->u.dst, sk,
391 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
392
393 skb->priority = sk->sk_priority;
394 skb->mark = sk->sk_mark;
395
396 res = ip_local_out(skb);
397 rcu_read_unlock();
398 return res;
399
400no_route:
401 rcu_read_unlock();
402 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
403 kfree_skb(skb);
404 return -EHOSTUNREACH;
405}
406
407
408static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
409{
410 to->pkt_type = from->pkt_type;
411 to->priority = from->priority;
412 to->protocol = from->protocol;
413 skb_dst_drop(to);
414 skb_dst_set(to, dst_clone(skb_dst(from)));
415 to->dev = from->dev;
416 to->mark = from->mark;
417
418
419 IPCB(to)->flags = IPCB(from)->flags;
420
421#ifdef CONFIG_NET_SCHED
422 to->tc_index = from->tc_index;
423#endif
424 nf_copy(to, from);
425#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
426 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
427 to->nf_trace = from->nf_trace;
428#endif
429#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
430 to->ipvs_property = from->ipvs_property;
431#endif
432 skb_copy_secmark(to, from);
433}
434
435
436
437
438
439
440
441
442int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
443{
444 struct iphdr *iph;
445 int raw = 0;
446 int ptr;
447 struct net_device *dev;
448 struct sk_buff *skb2;
449 unsigned int mtu, hlen, left, len, ll_rs, pad;
450 int offset;
451 __be16 not_last_frag;
452 struct rtable *rt = skb_rtable(skb);
453 int err = 0;
454
455 dev = rt->u.dst.dev;
456
457
458
459
460
461 iph = ip_hdr(skb);
462
463 if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
464 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
465 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
466 htonl(ip_skb_dst_mtu(skb)));
467 kfree_skb(skb);
468 return -EMSGSIZE;
469 }
470
471
472
473
474
475 hlen = iph->ihl * 4;
476 mtu = dst_mtu(&rt->u.dst) - hlen;
477#ifdef CONFIG_BRIDGE_NETFILTER
478 if (skb->nf_bridge)
479 mtu -= nf_bridge_mtu_reduction(skb);
480#endif
481 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
482
483
484
485
486
487
488
489
490 if (skb_has_frags(skb)) {
491 struct sk_buff *frag;
492 int first_len = skb_pagelen(skb);
493 int truesizes = 0;
494
495 if (first_len - hlen > mtu ||
496 ((first_len - hlen) & 7) ||
497 (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
498 skb_cloned(skb))
499 goto slow_path;
500
501 skb_walk_frags(skb, frag) {
502
503 if (frag->len > mtu ||
504 ((frag->len & 7) && frag->next) ||
505 skb_headroom(frag) < hlen)
506 goto slow_path;
507
508
509 if (skb_shared(frag))
510 goto slow_path;
511
512 BUG_ON(frag->sk);
513 if (skb->sk) {
514 frag->sk = skb->sk;
515 frag->destructor = sock_wfree;
516 }
517 truesizes += frag->truesize;
518 }
519
520
521
522 err = 0;
523 offset = 0;
524 frag = skb_shinfo(skb)->frag_list;
525 skb_frag_list_init(skb);
526 skb->data_len = first_len - skb_headlen(skb);
527 skb->truesize -= truesizes;
528 skb->len = first_len;
529 iph->tot_len = htons(first_len);
530 iph->frag_off = htons(IP_MF);
531 ip_send_check(iph);
532
533 for (;;) {
534
535
536 if (frag) {
537 frag->ip_summed = CHECKSUM_NONE;
538 skb_reset_transport_header(frag);
539 __skb_push(frag, hlen);
540 skb_reset_network_header(frag);
541 memcpy(skb_network_header(frag), iph, hlen);
542 iph = ip_hdr(frag);
543 iph->tot_len = htons(frag->len);
544 ip_copy_metadata(frag, skb);
545 if (offset == 0)
546 ip_options_fragment(frag);
547 offset += skb->len - hlen;
548 iph->frag_off = htons(offset>>3);
549 if (frag->next != NULL)
550 iph->frag_off |= htons(IP_MF);
551
552 ip_send_check(iph);
553 }
554
555 err = output(skb);
556
557 if (!err)
558 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
559 if (err || !frag)
560 break;
561
562 skb = frag;
563 frag = skb->next;
564 skb->next = NULL;
565 }
566
567 if (err == 0) {
568 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
569 return 0;
570 }
571
572 while (frag) {
573 skb = frag->next;
574 kfree_skb(frag);
575 frag = skb;
576 }
577 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
578 return err;
579 }
580
581slow_path:
582 left = skb->len - hlen;
583 ptr = raw + hlen;
584
585
586
587
588 pad = nf_bridge_pad(skb);
589 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
590 mtu -= pad;
591
592
593
594
595
596 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
597 not_last_frag = iph->frag_off & htons(IP_MF);
598
599
600
601
602
603 while (left > 0) {
604 len = left;
605
606 if (len > mtu)
607 len = mtu;
608
609
610 if (len < left) {
611 len &= ~7;
612 }
613
614
615
616
617 if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
618 NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
619 err = -ENOMEM;
620 goto fail;
621 }
622
623
624
625
626
627 ip_copy_metadata(skb2, skb);
628 skb_reserve(skb2, ll_rs);
629 skb_put(skb2, len + hlen);
630 skb_reset_network_header(skb2);
631 skb2->transport_header = skb2->network_header + hlen;
632
633
634
635
636
637
638 if (skb->sk)
639 skb_set_owner_w(skb2, skb->sk);
640
641
642
643
644
645 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
646
647
648
649
650 if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
651 BUG();
652 left -= len;
653
654
655
656
657 iph = ip_hdr(skb2);
658 iph->frag_off = htons((offset >> 3));
659
660
661
662
663
664
665
666 if (offset == 0)
667 ip_options_fragment(skb);
668
669
670
671
672
673 if (left > 0 || not_last_frag)
674 iph->frag_off |= htons(IP_MF);
675 ptr += len;
676 offset += len;
677
678
679
680
681 iph->tot_len = htons(len + hlen);
682
683 ip_send_check(iph);
684
685 err = output(skb2);
686 if (err)
687 goto fail;
688
689 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
690 }
691 kfree_skb(skb);
692 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
693 return err;
694
695fail:
696 kfree_skb(skb);
697 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
698 return err;
699}
700
701EXPORT_SYMBOL(ip_fragment);
702
703int
704ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
705{
706 struct iovec *iov = from;
707
708 if (skb->ip_summed == CHECKSUM_PARTIAL) {
709 if (memcpy_fromiovecend(to, iov, offset, len) < 0)
710 return -EFAULT;
711 } else {
712 __wsum csum = 0;
713 if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
714 return -EFAULT;
715 skb->csum = csum_block_add(skb->csum, csum, odd);
716 }
717 return 0;
718}
719
720static inline __wsum
721csum_page(struct page *page, int offset, int copy)
722{
723 char *kaddr;
724 __wsum csum;
725 kaddr = kmap(page);
726 csum = csum_partial(kaddr + offset, copy, 0);
727 kunmap(page);
728 return csum;
729}
730
731static inline int ip_ufo_append_data(struct sock *sk,
732 int getfrag(void *from, char *to, int offset, int len,
733 int odd, struct sk_buff *skb),
734 void *from, int length, int hh_len, int fragheaderlen,
735 int transhdrlen, int mtu, unsigned int flags)
736{
737 struct sk_buff *skb;
738 int err;
739
740
741
742
743
744 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
745 skb = sock_alloc_send_skb(sk,
746 hh_len + fragheaderlen + transhdrlen + 20,
747 (flags & MSG_DONTWAIT), &err);
748
749 if (skb == NULL)
750 return err;
751
752
753 skb_reserve(skb, hh_len);
754
755
756 skb_put(skb, fragheaderlen + transhdrlen);
757
758
759 skb_reset_network_header(skb);
760
761
762 skb->transport_header = skb->network_header + fragheaderlen;
763
764 skb->ip_summed = CHECKSUM_PARTIAL;
765 skb->csum = 0;
766 sk->sk_sndmsg_off = 0;
767
768
769 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
770 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
771 __skb_queue_tail(&sk->sk_write_queue, skb);
772 }
773
774 return skb_append_datato_frags(sk, skb, getfrag, from,
775 (length - transhdrlen));
776}
777
778
779
780
781
782
783
784
785
786
787
788
789int ip_append_data(struct sock *sk,
790 int getfrag(void *from, char *to, int offset, int len,
791 int odd, struct sk_buff *skb),
792 void *from, int length, int transhdrlen,
793 struct ipcm_cookie *ipc, struct rtable **rtp,
794 unsigned int flags)
795{
796 struct inet_sock *inet = inet_sk(sk);
797 struct sk_buff *skb;
798
799 struct ip_options *opt = NULL;
800 int hh_len;
801 int exthdrlen;
802 int mtu;
803 int copy;
804 int err;
805 int offset = 0;
806 unsigned int maxfraglen, fragheaderlen;
807 int csummode = CHECKSUM_NONE;
808 struct rtable *rt;
809
810 if (flags&MSG_PROBE)
811 return 0;
812
813 if (skb_queue_empty(&sk->sk_write_queue)) {
814
815
816
817 opt = ipc->opt;
818 if (opt) {
819 if (inet->cork.opt == NULL) {
820 inet->cork.opt = kmalloc(sizeof(struct ip_options) + 40, sk->sk_allocation);
821 if (unlikely(inet->cork.opt == NULL))
822 return -ENOBUFS;
823 }
824 memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
825 inet->cork.flags |= IPCORK_OPT;
826 inet->cork.addr = ipc->addr;
827 }
828 rt = *rtp;
829 if (unlikely(!rt))
830 return -EFAULT;
831
832
833
834 *rtp = NULL;
835 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
836 rt->u.dst.dev->mtu :
837 dst_mtu(rt->u.dst.path);
838 inet->cork.dst = &rt->u.dst;
839 inet->cork.length = 0;
840 sk->sk_sndmsg_page = NULL;
841 sk->sk_sndmsg_off = 0;
842 if ((exthdrlen = rt->u.dst.header_len) != 0) {
843 length += exthdrlen;
844 transhdrlen += exthdrlen;
845 }
846 } else {
847 rt = (struct rtable *)inet->cork.dst;
848 if (inet->cork.flags & IPCORK_OPT)
849 opt = inet->cork.opt;
850
851 transhdrlen = 0;
852 exthdrlen = 0;
853 mtu = inet->cork.fragsize;
854 }
855 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
856
857 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
858 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
859
860 if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
861 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
862 mtu-exthdrlen);
863 return -EMSGSIZE;
864 }
865
866
867
868
869
870 if (transhdrlen &&
871 length + fragheaderlen <= mtu &&
872 rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
873 !exthdrlen)
874 csummode = CHECKSUM_PARTIAL;
875
876 skb = skb_peek_tail(&sk->sk_write_queue);
877
878 inet->cork.length += length;
879 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
880 (sk->sk_protocol == IPPROTO_UDP) &&
881 (rt->u.dst.dev->features & NETIF_F_UFO)) {
882 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
883 fragheaderlen, transhdrlen, mtu,
884 flags);
885 if (err)
886 goto error;
887 return 0;
888 }
889
890
891
892
893
894
895
896
897 if (!skb)
898 goto alloc_new_skb;
899
900 while (length > 0) {
901
902 copy = mtu - skb->len;
903 if (copy < length)
904 copy = maxfraglen - skb->len;
905 if (copy <= 0) {
906 char *data;
907 unsigned int datalen;
908 unsigned int fraglen;
909 unsigned int fraggap;
910 unsigned int alloclen;
911 struct sk_buff *skb_prev;
912alloc_new_skb:
913 skb_prev = skb;
914 if (skb_prev)
915 fraggap = skb_prev->len - maxfraglen;
916 else
917 fraggap = 0;
918
919
920
921
922
923 datalen = length + fraggap;
924 if (datalen > mtu - fragheaderlen)
925 datalen = maxfraglen - fragheaderlen;
926 fraglen = datalen + fragheaderlen;
927
928 if ((flags & MSG_MORE) &&
929 !(rt->u.dst.dev->features&NETIF_F_SG))
930 alloclen = mtu;
931 else
932 alloclen = datalen + fragheaderlen;
933
934
935
936
937
938
939 if (datalen == length + fraggap)
940 alloclen += rt->u.dst.trailer_len;
941
942 if (transhdrlen) {
943 skb = sock_alloc_send_skb(sk,
944 alloclen + hh_len + 15,
945 (flags & MSG_DONTWAIT), &err);
946 } else {
947 skb = NULL;
948 if (atomic_read(&sk->sk_wmem_alloc) <=
949 2 * sk->sk_sndbuf)
950 skb = sock_wmalloc(sk,
951 alloclen + hh_len + 15, 1,
952 sk->sk_allocation);
953 if (unlikely(skb == NULL))
954 err = -ENOBUFS;
955 else
956
957
958 ipc->shtx.flags = 0;
959 }
960 if (skb == NULL)
961 goto error;
962
963
964
965
966 skb->ip_summed = csummode;
967 skb->csum = 0;
968 skb_reserve(skb, hh_len);
969 *skb_tx(skb) = ipc->shtx;
970
971
972
973
974 data = skb_put(skb, fraglen);
975 skb_set_network_header(skb, exthdrlen);
976 skb->transport_header = (skb->network_header +
977 fragheaderlen);
978 data += fragheaderlen;
979
980 if (fraggap) {
981 skb->csum = skb_copy_and_csum_bits(
982 skb_prev, maxfraglen,
983 data + transhdrlen, fraggap, 0);
984 skb_prev->csum = csum_sub(skb_prev->csum,
985 skb->csum);
986 data += fraggap;
987 pskb_trim_unique(skb_prev, maxfraglen);
988 }
989
990 copy = datalen - transhdrlen - fraggap;
991 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
992 err = -EFAULT;
993 kfree_skb(skb);
994 goto error;
995 }
996
997 offset += copy;
998 length -= datalen - fraggap;
999 transhdrlen = 0;
1000 exthdrlen = 0;
1001 csummode = CHECKSUM_NONE;
1002
1003
1004
1005
1006 __skb_queue_tail(&sk->sk_write_queue, skb);
1007 continue;
1008 }
1009
1010 if (copy > length)
1011 copy = length;
1012
1013 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1014 unsigned int off;
1015
1016 off = skb->len;
1017 if (getfrag(from, skb_put(skb, copy),
1018 offset, copy, off, skb) < 0) {
1019 __skb_trim(skb, off);
1020 err = -EFAULT;
1021 goto error;
1022 }
1023 } else {
1024 int i = skb_shinfo(skb)->nr_frags;
1025 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1026 struct page *page = sk->sk_sndmsg_page;
1027 int off = sk->sk_sndmsg_off;
1028 unsigned int left;
1029
1030 if (page && (left = PAGE_SIZE - off) > 0) {
1031 if (copy >= left)
1032 copy = left;
1033 if (page != frag->page) {
1034 if (i == MAX_SKB_FRAGS) {
1035 err = -EMSGSIZE;
1036 goto error;
1037 }
1038 get_page(page);
1039 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1040 frag = &skb_shinfo(skb)->frags[i];
1041 }
1042 } else if (i < MAX_SKB_FRAGS) {
1043 if (copy > PAGE_SIZE)
1044 copy = PAGE_SIZE;
1045 page = alloc_pages(sk->sk_allocation, 0);
1046 if (page == NULL) {
1047 err = -ENOMEM;
1048 goto error;
1049 }
1050 sk->sk_sndmsg_page = page;
1051 sk->sk_sndmsg_off = 0;
1052
1053 skb_fill_page_desc(skb, i, page, 0, 0);
1054 frag = &skb_shinfo(skb)->frags[i];
1055 } else {
1056 err = -EMSGSIZE;
1057 goto error;
1058 }
1059 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1060 err = -EFAULT;
1061 goto error;
1062 }
1063 sk->sk_sndmsg_off += copy;
1064 frag->size += copy;
1065 skb->len += copy;
1066 skb->data_len += copy;
1067 skb->truesize += copy;
1068 atomic_add(copy, &sk->sk_wmem_alloc);
1069 }
1070 offset += copy;
1071 length -= copy;
1072 }
1073
1074 return 0;
1075
1076error:
1077 inet->cork.length -= length;
1078 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1079 return err;
1080}
1081
1082ssize_t ip_append_page(struct sock *sk, struct page *page,
1083 int offset, size_t size, int flags)
1084{
1085 struct inet_sock *inet = inet_sk(sk);
1086 struct sk_buff *skb;
1087 struct rtable *rt;
1088 struct ip_options *opt = NULL;
1089 int hh_len;
1090 int mtu;
1091 int len;
1092 int err;
1093 unsigned int maxfraglen, fragheaderlen, fraggap;
1094
1095 if (inet->hdrincl)
1096 return -EPERM;
1097
1098 if (flags&MSG_PROBE)
1099 return 0;
1100
1101 if (skb_queue_empty(&sk->sk_write_queue))
1102 return -EINVAL;
1103
1104 rt = (struct rtable *)inet->cork.dst;
1105 if (inet->cork.flags & IPCORK_OPT)
1106 opt = inet->cork.opt;
1107
1108 if (!(rt->u.dst.dev->features&NETIF_F_SG))
1109 return -EOPNOTSUPP;
1110
1111 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1112 mtu = inet->cork.fragsize;
1113
1114 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
1115 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1116
1117 if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
1118 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, mtu);
1119 return -EMSGSIZE;
1120 }
1121
1122 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1123 return -EINVAL;
1124
1125 inet->cork.length += size;
1126 if ((size + skb->len > mtu) &&
1127 (sk->sk_protocol == IPPROTO_UDP) &&
1128 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1129 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1130 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1131 }
1132
1133
1134 while (size > 0) {
1135 int i;
1136
1137 if (skb_is_gso(skb))
1138 len = size;
1139 else {
1140
1141
1142 len = mtu - skb->len;
1143 if (len < size)
1144 len = maxfraglen - skb->len;
1145 }
1146 if (len <= 0) {
1147 struct sk_buff *skb_prev;
1148 int alloclen;
1149
1150 skb_prev = skb;
1151 fraggap = skb_prev->len - maxfraglen;
1152
1153 alloclen = fragheaderlen + hh_len + fraggap + 15;
1154 skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
1155 if (unlikely(!skb)) {
1156 err = -ENOBUFS;
1157 goto error;
1158 }
1159
1160
1161
1162
1163 skb->ip_summed = CHECKSUM_NONE;
1164 skb->csum = 0;
1165 skb_reserve(skb, hh_len);
1166
1167
1168
1169
1170 skb_put(skb, fragheaderlen + fraggap);
1171 skb_reset_network_header(skb);
1172 skb->transport_header = (skb->network_header +
1173 fragheaderlen);
1174 if (fraggap) {
1175 skb->csum = skb_copy_and_csum_bits(skb_prev,
1176 maxfraglen,
1177 skb_transport_header(skb),
1178 fraggap, 0);
1179 skb_prev->csum = csum_sub(skb_prev->csum,
1180 skb->csum);
1181 pskb_trim_unique(skb_prev, maxfraglen);
1182 }
1183
1184
1185
1186
1187 __skb_queue_tail(&sk->sk_write_queue, skb);
1188 continue;
1189 }
1190
1191 i = skb_shinfo(skb)->nr_frags;
1192 if (len > size)
1193 len = size;
1194 if (skb_can_coalesce(skb, i, page, offset)) {
1195 skb_shinfo(skb)->frags[i-1].size += len;
1196 } else if (i < MAX_SKB_FRAGS) {
1197 get_page(page);
1198 skb_fill_page_desc(skb, i, page, offset, len);
1199 } else {
1200 err = -EMSGSIZE;
1201 goto error;
1202 }
1203
1204 if (skb->ip_summed == CHECKSUM_NONE) {
1205 __wsum csum;
1206 csum = csum_page(page, offset, len);
1207 skb->csum = csum_block_add(skb->csum, csum, skb->len);
1208 }
1209
1210 skb->len += len;
1211 skb->data_len += len;
1212 skb->truesize += len;
1213 atomic_add(len, &sk->sk_wmem_alloc);
1214 offset += len;
1215 size -= len;
1216 }
1217 return 0;
1218
1219error:
1220 inet->cork.length -= size;
1221 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
1222 return err;
1223}
1224
1225static void ip_cork_release(struct inet_sock *inet)
1226{
1227 inet->cork.flags &= ~IPCORK_OPT;
1228 kfree(inet->cork.opt);
1229 inet->cork.opt = NULL;
1230 dst_release(inet->cork.dst);
1231 inet->cork.dst = NULL;
1232}
1233
1234
1235
1236
1237
1238int ip_push_pending_frames(struct sock *sk)
1239{
1240 struct sk_buff *skb, *tmp_skb;
1241 struct sk_buff **tail_skb;
1242 struct inet_sock *inet = inet_sk(sk);
1243 struct net *net = sock_net(sk);
1244 struct ip_options *opt = NULL;
1245 struct rtable *rt = (struct rtable *)inet->cork.dst;
1246 struct iphdr *iph;
1247 __be16 df = 0;
1248 __u8 ttl;
1249 int err = 0;
1250
1251 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1252 goto out;
1253 tail_skb = &(skb_shinfo(skb)->frag_list);
1254
1255
1256 if (skb->data < skb_network_header(skb))
1257 __skb_pull(skb, skb_network_offset(skb));
1258 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1259 __skb_pull(tmp_skb, skb_network_header_len(skb));
1260 *tail_skb = tmp_skb;
1261 tail_skb = &(tmp_skb->next);
1262 skb->len += tmp_skb->len;
1263 skb->data_len += tmp_skb->len;
1264 skb->truesize += tmp_skb->truesize;
1265 tmp_skb->destructor = NULL;
1266 tmp_skb->sk = NULL;
1267 }
1268
1269
1270
1271
1272
1273 if (inet->pmtudisc < IP_PMTUDISC_DO)
1274 skb->local_df = 1;
1275
1276
1277
1278
1279 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1280 (skb->len <= dst_mtu(&rt->u.dst) &&
1281 ip_dont_fragment(sk, &rt->u.dst)))
1282 df = htons(IP_DF);
1283
1284 if (inet->cork.flags & IPCORK_OPT)
1285 opt = inet->cork.opt;
1286
1287 if (rt->rt_type == RTN_MULTICAST)
1288 ttl = inet->mc_ttl;
1289 else
1290 ttl = ip_select_ttl(inet, &rt->u.dst);
1291
1292 iph = (struct iphdr *)skb->data;
1293 iph->version = 4;
1294 iph->ihl = 5;
1295 if (opt) {
1296 iph->ihl += opt->optlen>>2;
1297 ip_options_build(skb, opt, inet->cork.addr, rt, 0);
1298 }
1299 iph->tos = inet->tos;
1300 iph->frag_off = df;
1301 ip_select_ident(iph, &rt->u.dst, sk);
1302 iph->ttl = ttl;
1303 iph->protocol = sk->sk_protocol;
1304 iph->saddr = rt->rt_src;
1305 iph->daddr = rt->rt_dst;
1306
1307 skb->priority = sk->sk_priority;
1308 skb->mark = sk->sk_mark;
1309
1310
1311
1312
1313 inet->cork.dst = NULL;
1314 skb_dst_set(skb, &rt->u.dst);
1315
1316 if (iph->protocol == IPPROTO_ICMP)
1317 icmp_out_count(net, ((struct icmphdr *)
1318 skb_transport_header(skb))->type);
1319
1320
1321 err = ip_local_out(skb);
1322 if (err) {
1323 if (err > 0)
1324 err = net_xmit_errno(err);
1325 if (err)
1326 goto error;
1327 }
1328
1329out:
1330 ip_cork_release(inet);
1331 return err;
1332
1333error:
1334 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
1335 goto out;
1336}
1337
1338
1339
1340
1341void ip_flush_pending_frames(struct sock *sk)
1342{
1343 struct sk_buff *skb;
1344
1345 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
1346 kfree_skb(skb);
1347
1348 ip_cork_release(inet_sk(sk));
1349}
1350
1351
1352
1353
1354
1355static int ip_reply_glue_bits(void *dptr, char *to, int offset,
1356 int len, int odd, struct sk_buff *skb)
1357{
1358 __wsum csum;
1359
1360 csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
1361 skb->csum = csum_block_add(skb->csum, csum, odd);
1362 return 0;
1363}
1364
1365
1366
1367
1368
1369
1370
1371
1372void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
1373 unsigned int len)
1374{
1375 struct inet_sock *inet = inet_sk(sk);
1376 struct {
1377 struct ip_options opt;
1378 char data[40];
1379 } replyopts;
1380 struct ipcm_cookie ipc;
1381 __be32 daddr;
1382 struct rtable *rt = skb_rtable(skb);
1383
1384 if (ip_options_echo(&replyopts.opt, skb))
1385 return;
1386
1387 daddr = ipc.addr = rt->rt_src;
1388 ipc.opt = NULL;
1389 ipc.shtx.flags = 0;
1390
1391 if (replyopts.opt.optlen) {
1392 ipc.opt = &replyopts.opt;
1393
1394 if (ipc.opt->srr)
1395 daddr = replyopts.opt.faddr;
1396 }
1397
1398 {
1399 struct flowi fl = { .oif = arg->bound_dev_if,
1400 .nl_u = { .ip4_u =
1401 { .daddr = daddr,
1402 .saddr = rt->rt_spec_dst,
1403 .tos = RT_TOS(ip_hdr(skb)->tos) } },
1404
1405 .uli_u = { .ports =
1406 { .sport = tcp_hdr(skb)->dest,
1407 .dport = tcp_hdr(skb)->source } },
1408 .proto = sk->sk_protocol,
1409 .flags = ip_reply_arg_flowi_flags(arg) };
1410 security_skb_classify_flow(skb, &fl);
1411 if (ip_route_output_key(sock_net(sk), &rt, &fl))
1412 return;
1413 }
1414
1415
1416
1417
1418
1419
1420
1421 bh_lock_sock(sk);
1422 inet->tos = ip_hdr(skb)->tos;
1423 sk->sk_priority = skb->priority;
1424 sk->sk_protocol = ip_hdr(skb)->protocol;
1425 sk->sk_bound_dev_if = arg->bound_dev_if;
1426 ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
1427 &ipc, &rt, MSG_DONTWAIT);
1428 if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
1429 if (arg->csumoffset >= 0)
1430 *((__sum16 *)skb_transport_header(skb) +
1431 arg->csumoffset) = csum_fold(csum_add(skb->csum,
1432 arg->csum));
1433 skb->ip_summed = CHECKSUM_NONE;
1434 ip_push_pending_frames(sk);
1435 }
1436
1437 bh_unlock_sock(sk);
1438
1439 ip_rt_put(rt);
1440}
1441
1442void __init ip_init(void)
1443{
1444 ip_rt_init();
1445 inet_initpeers();
1446
1447#if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)
1448 igmp_mc_proc_init();
1449#endif
1450}
1451
1452EXPORT_SYMBOL(ip_generic_getfrag);
1453EXPORT_SYMBOL(ip_queue_xmit);
1454EXPORT_SYMBOL(ip_send_check);
1455