1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42#include <asm/uaccess.h>
43#include <asm/system.h>
44#include <linux/types.h>
45#include <linux/kernel.h>
46#include <linux/sched.h>
47#include <linux/mm.h>
48#include <linux/string.h>
49#include <linux/errno.h>
50#include <linux/config.h>
51
52#include <linux/socket.h>
53#include <linux/sockios.h>
54#include <linux/in.h>
55#include <linux/inet.h>
56#include <linux/netdevice.h>
57#include <linux/etherdevice.h>
58#include <linux/proc_fs.h>
59#include <linux/stat.h>
60#include <linux/init.h>
61
62#include <net/snmp.h>
63#include <net/ip.h>
64#include <net/protocol.h>
65#include <net/route.h>
66#include <net/tcp.h>
67#include <net/udp.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <net/arp.h>
71#include <net/icmp.h>
72#include <net/raw.h>
73#include <net/checksum.h>
74#include <linux/igmp.h>
75#include <linux/ip_fw.h>
76#include <linux/firewall.h>
77#include <linux/mroute.h>
78#include <linux/netlink.h>
79
80
81
82
83
84int sysctl_ip_dynaddr = 0;
85
86
87int ip_id_count = 0;
88
89
90__inline__ void ip_send_check(struct iphdr *iph)
91{
92 iph->check = 0;
93 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
94}
95
96
97
98
99void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
100 u32 saddr, u32 daddr, struct ip_options *opt)
101{
102 struct rtable *rt = (struct rtable *)skb->dst;
103 struct iphdr *iph;
104 struct device *dev;
105
106
107 if (opt)
108 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
109 else
110 iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
111
112 iph->version = 4;
113 iph->ihl = 5;
114 iph->tos = sk->ip_tos;
115 iph->frag_off = 0;
116 if (ip_dont_fragment(sk, &rt->u.dst))
117 iph->frag_off |= htons(IP_DF);
118 iph->ttl = sk->ip_ttl;
119 iph->daddr = rt->rt_dst;
120 iph->saddr = rt->rt_src;
121 iph->protocol = sk->protocol;
122 iph->tot_len = htons(skb->len);
123 iph->id = htons(ip_id_count++);
124 skb->nh.iph = iph;
125
126 if (opt && opt->optlen) {
127 iph->ihl += opt->optlen>>2;
128 ip_options_build(skb, opt, daddr, rt, 0);
129 }
130
131 dev = rt->u.dst.dev;
132
133#ifdef CONFIG_FIREWALL
134
135 switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
136 case FW_REJECT:
137 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
138
139 case FW_BLOCK:
140 case FW_QUEUE:
141 kfree_skb(skb);
142 return;
143 }
144#endif
145
146 ip_send_check(iph);
147
148
149 skb->dst->output(skb);
150 return;
151}
152
153int __ip_finish_output(struct sk_buff *skb)
154{
155 return ip_finish_output(skb);
156}
157
158int ip_mc_output(struct sk_buff *skb)
159{
160 struct sock *sk = skb->sk;
161 struct rtable *rt = (struct rtable*)skb->dst;
162 struct device *dev = rt->u.dst.dev;
163
164
165
166
167
168 ip_statistics.IpOutRequests++;
169#ifdef CONFIG_IP_ROUTE_NAT
170 if (rt->rt_flags & RTCF_NAT)
171 ip_do_nat(skb);
172#endif
173
174 skb->dev = dev;
175 skb->protocol = __constant_htons(ETH_P_IP);
176
177
178
179
180
181 if (rt->rt_flags&RTCF_MULTICAST && (!sk || sk->ip_mc_loop)) {
182#ifdef CONFIG_IP_MROUTE
183
184
185
186
187
188
189
190
191 if ((rt->rt_flags&RTCF_LOCAL) || !(IPCB(skb)->flags&IPSKB_FORWARDED))
192#endif
193 dev_loopback_xmit(skb);
194
195
196
197 if (skb->nh.iph->ttl == 0) {
198 kfree_skb(skb);
199 return 0;
200 }
201 }
202
203 if (rt->rt_flags&RTCF_BROADCAST)
204 dev_loopback_xmit(skb);
205
206 return ip_finish_output(skb);
207}
208
209int ip_output(struct sk_buff *skb)
210{
211#ifdef CONFIG_IP_ROUTE_NAT
212 struct rtable *rt = (struct rtable*)skb->dst;
213#endif
214
215 ip_statistics.IpOutRequests++;
216
217#ifdef CONFIG_IP_ROUTE_NAT
218 if (rt->rt_flags&RTCF_NAT)
219 ip_do_nat(skb);
220#endif
221
222 return ip_finish_output(skb);
223}
224
225
226
227
228
229
230
231
232
233
234void ip_queue_xmit(struct sk_buff *skb)
235{
236 struct sock *sk = skb->sk;
237 struct ip_options *opt = sk->opt;
238 struct rtable *rt;
239 struct device *dev;
240 struct iphdr *iph;
241 unsigned int tot_len;
242
243
244 rt = (struct rtable *) sk->dst_cache;
245 if(rt == NULL || rt->u.dst.obsolete) {
246 u32 daddr;
247
248 sk->dst_cache = NULL;
249 ip_rt_put(rt);
250
251
252 daddr = sk->daddr;
253 if(opt && opt->srr)
254 daddr = opt->faddr;
255
256
257
258
259
260 if(ip_route_output(&rt, daddr, sk->saddr,
261 RT_TOS(sk->ip_tos) | RTO_CONN | sk->localroute,
262 sk->bound_dev_if))
263 goto drop;
264 sk->dst_cache = &rt->u.dst;
265 }
266 if(opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
267 goto no_route;
268
269
270 skb->dst = dst_clone(sk->dst_cache);
271
272
273 iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
274 iph->version = 4;
275 iph->ihl = 5;
276 iph->tos = sk->ip_tos;
277 iph->frag_off = 0;
278 iph->ttl = sk->ip_ttl;
279 iph->daddr = rt->rt_dst;
280 iph->saddr = rt->rt_src;
281 iph->protocol = sk->protocol;
282 skb->nh.iph = iph;
283
284
285 if(opt && opt->optlen) {
286 iph->ihl += opt->optlen >> 2;
287 ip_options_build(skb, opt, sk->daddr, rt, 0);
288 }
289
290 tot_len = skb->len;
291 iph->tot_len = htons(tot_len);
292 iph->id = htons(ip_id_count++);
293
294 dev = rt->u.dst.dev;
295
296#ifdef CONFIG_FIREWALL
297
298 switch (call_out_firewall(PF_INET, dev, iph, NULL, &skb)) {
299 case FW_REJECT:
300 start_bh_atomic();
301 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
302 end_bh_atomic();
303
304 case FW_BLOCK:
305 case FW_QUEUE:
306 goto drop;
307 }
308#endif
309
310
311
312
313
314
315 if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
316 struct sk_buff *skb2;
317
318 skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
319 kfree_skb(skb);
320 if (skb2 == NULL)
321 return;
322 if (sk)
323 skb_set_owner_w(skb, sk);
324 skb = skb2;
325 iph = skb->nh.iph;
326 }
327
328
329
330
331 if (tot_len > rt->u.dst.pmtu)
332 goto fragment;
333
334 if (ip_dont_fragment(sk, &rt->u.dst))
335 iph->frag_off |= __constant_htons(IP_DF);
336
337
338 ip_send_check(iph);
339
340 skb->priority = sk->priority;
341 skb->dst->output(skb);
342 return;
343
344fragment:
345 if (ip_dont_fragment(sk, &rt->u.dst) &&
346 tot_len > (iph->ihl<<2) + sizeof(struct tcphdr)+16) {
347
348
349
350
351
352 iph->frag_off |= __constant_htons(IP_DF);
353 NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big to self\n"));
354
355
356 start_bh_atomic();
357 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
358 htonl(rt->u.dst.pmtu));
359 end_bh_atomic();
360 goto drop;
361 }
362 ip_fragment(skb, skb->dst->output);
363 return;
364
365no_route:
366 sk->dst_cache = NULL;
367 ip_rt_put(rt);
368 ip_statistics.IpOutNoRoutes++;
369
370drop:
371 kfree_skb(skb);
372}
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394int ip_build_xmit_slow(struct sock *sk,
395 int getfrag (const void *,
396 char *,
397 unsigned int,
398 unsigned int),
399 const void *frag,
400 unsigned length,
401 struct ipcm_cookie *ipc,
402 struct rtable *rt,
403 int flags)
404{
405 unsigned int fraglen, maxfraglen, fragheaderlen;
406 int err;
407 int offset, mf;
408 int mtu;
409 unsigned short id;
410
411 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
412 int nfrags=0;
413 struct ip_options *opt = ipc->opt;
414 int df = 0;
415
416 mtu = rt->u.dst.pmtu;
417 if (ip_dont_fragment(sk, &rt->u.dst))
418 df = htons(IP_DF);
419
420 length -= sizeof(struct iphdr);
421
422 if (opt) {
423 fragheaderlen = sizeof(struct iphdr) + opt->optlen;
424 maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
425 } else {
426 fragheaderlen = sizeof(struct iphdr);
427
428
429
430
431
432
433 maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
434 }
435
436 if (length + fragheaderlen > 0xFFFF) {
437 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
438 return -EMSGSIZE;
439 }
440
441
442
443
444
445 offset = length - (length % (maxfraglen - fragheaderlen));
446
447
448
449
450
451 fraglen = length - offset + fragheaderlen;
452
453 if (length-offset==0) {
454 fraglen = maxfraglen;
455 offset -= maxfraglen-fragheaderlen;
456 }
457
458
459
460
461
462
463 mf = 0;
464
465
466
467
468
469 if (offset > 0 && df) {
470 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
471 return(-EMSGSIZE);
472 }
473
474
475
476
477
478 dev_lock_list();
479
480
481
482
483
484 id = htons(ip_id_count++);
485
486
487
488
489
490 do {
491 char *data;
492 struct sk_buff * skb;
493
494
495
496
497
498 skb = sock_alloc_send_skb(sk, fraglen+hh_len+15, 0, flags&MSG_DONTWAIT, &err);
499 if (skb == NULL)
500 goto error;
501
502
503
504
505
506 skb->priority = sk->priority;
507 skb->dst = dst_clone(&rt->u.dst);
508 skb_reserve(skb, hh_len);
509
510
511
512
513
514 data = skb_put(skb, fraglen);
515 skb->nh.iph = (struct iphdr *)data;
516
517
518
519
520
521 {
522 struct iphdr *iph = (struct iphdr *)data;
523
524 iph->version = 4;
525 iph->ihl = 5;
526 if (opt) {
527 iph->ihl += opt->optlen>>2;
528 ip_options_build(skb, opt,
529 ipc->addr, rt, offset);
530 }
531 iph->tos = sk->ip_tos;
532 iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
533 iph->id = id;
534 iph->frag_off = htons(offset>>3);
535 iph->frag_off |= mf|df;
536 if (rt->rt_type == RTN_MULTICAST)
537 iph->ttl = sk->ip_mc_ttl;
538 else
539 iph->ttl = sk->ip_ttl;
540 iph->protocol = sk->protocol;
541 iph->check = 0;
542 iph->saddr = rt->rt_src;
543 iph->daddr = rt->rt_dst;
544 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
545 data += iph->ihl*4;
546
547
548
549
550
551 mf = htons(IP_MF);
552 }
553
554
555
556
557
558 if (getfrag(frag, data, offset, fraglen-fragheaderlen)) {
559 err = -EFAULT;
560 kfree_skb(skb);
561 goto error;
562 }
563
564 offset -= (maxfraglen-fragheaderlen);
565 fraglen = maxfraglen;
566
567 nfrags++;
568
569#ifdef CONFIG_FIREWALL
570 switch (call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb)) {
571 case FW_QUEUE:
572 kfree_skb(skb);
573 continue;
574 case FW_BLOCK:
575 case FW_REJECT:
576 kfree_skb(skb);
577 err = -EPERM;
578 goto error;
579 }
580#endif
581
582 err = -ENETDOWN;
583 if (rt->u.dst.output(skb))
584 goto error;
585 } while (offset >= 0);
586
587 if (nfrags>1)
588 ip_statistics.IpFragCreates += nfrags;
589 dev_unlock_list();
590 return 0;
591
592error:
593 ip_statistics.IpOutDiscards++;
594 if (nfrags>1)
595 ip_statistics.IpFragCreates += nfrags;
596 dev_unlock_list();
597 return err;
598}
599
600
601
602
603
604int ip_build_xmit(struct sock *sk,
605 int getfrag (const void *,
606 char *,
607 unsigned int,
608 unsigned int),
609 const void *frag,
610 unsigned length,
611 struct ipcm_cookie *ipc,
612 struct rtable *rt,
613 int flags)
614{
615 int err;
616 struct sk_buff *skb;
617 int df;
618 struct iphdr *iph;
619
620
621
622
623
624
625 if (!sk->ip_hdrincl) {
626 length += sizeof(struct iphdr);
627
628
629
630
631 if (length > rt->u.dst.pmtu || ipc->opt != NULL)
632 return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
633 } else {
634 if (length > rt->u.dst.dev->mtu) {
635 ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
636 return -EMSGSIZE;
637 }
638 }
639
640
641
642
643 df = 0;
644 if (ip_dont_fragment(sk, &rt->u.dst))
645 df = htons(IP_DF);
646
647
648
649
650 {
651 int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
652
653 skb = sock_alloc_send_skb(sk, length+hh_len+15,
654 0, flags&MSG_DONTWAIT, &err);
655 if(skb==NULL)
656 goto error;
657 skb_reserve(skb, hh_len);
658 }
659
660 skb->priority = sk->priority;
661 skb->dst = dst_clone(&rt->u.dst);
662
663 skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
664
665 dev_lock_list();
666
667 if(!sk->ip_hdrincl) {
668 iph->version=4;
669 iph->ihl=5;
670 iph->tos=sk->ip_tos;
671 iph->tot_len = htons(length);
672 iph->id=htons(ip_id_count++);
673 iph->frag_off = df;
674 iph->ttl=sk->ip_mc_ttl;
675 if (rt->rt_type != RTN_MULTICAST)
676 iph->ttl=sk->ip_ttl;
677 iph->protocol=sk->protocol;
678 iph->saddr=rt->rt_src;
679 iph->daddr=rt->rt_dst;
680 iph->check=0;
681 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
682 err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
683 }
684 else
685 err = getfrag(frag, (void *)iph, 0, length);
686
687 dev_unlock_list();
688
689 if (err)
690 goto error_fault;
691
692#ifdef CONFIG_FIREWALL
693 switch (call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb)) {
694 case FW_QUEUE:
695 kfree_skb(skb);
696 return 0;
697 case FW_BLOCK:
698 case FW_REJECT:
699 kfree_skb(skb);
700 err = -EPERM;
701 goto error;
702 }
703#endif
704
705 return rt->u.dst.output(skb);
706
707error_fault:
708 err = -EFAULT;
709 kfree_skb(skb);
710error:
711 ip_statistics.IpOutDiscards++;
712 return err;
713}
714
715
716
717
718
719
720
721
722
723
724
725
726void ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
727{
728 struct iphdr *iph;
729 unsigned char *raw;
730 unsigned char *ptr;
731 struct device *dev;
732 struct sk_buff *skb2;
733 unsigned int mtu, hlen, left, len;
734 int offset;
735 int not_last_frag;
736 struct rtable *rt = (struct rtable*)skb->dst;
737
738 dev = rt->u.dst.dev;
739
740
741
742
743
744 raw = skb->nh.raw;
745 iph = (struct iphdr*)raw;
746
747
748
749
750
751 hlen = iph->ihl * 4;
752 left = ntohs(iph->tot_len) - hlen;
753 mtu = rt->u.dst.pmtu - hlen;
754 ptr = raw + hlen;
755
756
757
758
759
760
761
762
763
764#ifdef CONFIG_NET_PARANOIA
765 if (mtu<8)
766 goto fail;
767#endif
768
769
770
771
772
773 offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
774 not_last_frag = iph->frag_off & htons(IP_MF);
775
776
777
778
779
780 while(left > 0) {
781 len = left;
782
783 if (len > mtu)
784 len = mtu;
785
786
787 if (len < left) {
788 len &= ~7;
789 }
790
791
792
793
794 if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
795 NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
796 goto fail;
797 }
798
799
800
801
802
803 skb2->pkt_type = skb->pkt_type;
804 skb2->priority = skb->priority;
805 skb_reserve(skb2, (dev->hard_header_len+15)&~15);
806 skb_put(skb2, len + hlen);
807 skb2->nh.raw = skb2->data;
808 skb2->h.raw = skb2->data + hlen;
809
810
811
812
813
814
815 if (skb->sk)
816 skb_set_owner_w(skb2, skb->sk);
817 skb2->dst = dst_clone(skb->dst);
818
819
820
821
822
823 memcpy(skb2->nh.raw, raw, hlen);
824
825
826
827
828 memcpy(skb2->h.raw, ptr, len);
829 left -= len;
830
831
832
833
834 iph = skb2->nh.iph;
835 iph->frag_off = htons((offset >> 3));
836
837
838
839
840
841
842
843 if (offset == 0)
844 ip_options_fragment(skb);
845
846
847
848
849
850 if (left > 0 || not_last_frag)
851 iph->frag_off |= htons(IP_MF);
852 ptr += len;
853 offset += len;
854
855
856
857
858
859 ip_statistics.IpFragCreates++;
860
861 iph->tot_len = htons(len + hlen);
862
863 ip_send_check(iph);
864
865 output(skb2);
866 }
867 kfree_skb(skb);
868 ip_statistics.IpFragOKs++;
869 return;
870
871fail:
872 kfree_skb(skb);
873 ip_statistics.IpFragFails++;
874}
875
876
877
878
879static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
880 unsigned int fraglen)
881{
882 struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
883 u16 *pktp = (u16 *)to;
884 struct iovec *iov;
885 int len;
886 int hdrflag = 1;
887
888 iov = &dp->iov[0];
889 if (offset >= iov->iov_len) {
890 offset -= iov->iov_len;
891 iov++;
892 hdrflag = 0;
893 }
894 len = iov->iov_len - offset;
895 if (fraglen > len) {
896 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
897 dp->csum);
898 offset = 0;
899 fraglen -= len;
900 to += len;
901 iov++;
902 }
903
904 dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
905 dp->csum);
906
907 if (hdrflag && dp->csumoffset)
908 *(pktp + dp->csumoffset) = csum_fold(dp->csum);
909 return 0;
910}
911
912
913
914
915
916
917
918
919void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
920 unsigned int len)
921{
922 struct {
923 struct ip_options opt;
924 char data[40];
925 } replyopts;
926 struct ipcm_cookie ipc;
927 u32 daddr;
928 struct rtable *rt = (struct rtable*)skb->dst;
929
930 if (ip_options_echo(&replyopts.opt, skb))
931 return;
932
933 sk->ip_tos = skb->nh.iph->tos;
934 sk->priority = skb->priority;
935 sk->protocol = skb->nh.iph->protocol;
936
937 daddr = ipc.addr = rt->rt_src;
938 ipc.opt = &replyopts.opt;
939
940 if (ipc.opt->srr)
941 daddr = replyopts.opt.faddr;
942 if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
943 return;
944
945
946 ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
947 ip_rt_put(rt);
948}
949
950
951
952
953
954static struct packet_type ip_packet_type =
955{
956 __constant_htons(ETH_P_IP),
957 NULL,
958 ip_rcv,
959 NULL,
960 NULL,
961};
962
963
964
965#ifdef CONFIG_PROC_FS
966#ifdef CONFIG_IP_MULTICAST
967static struct proc_dir_entry proc_net_igmp = {
968 PROC_NET_IGMP, 4, "igmp",
969 S_IFREG | S_IRUGO, 1, 0, 0,
970 0, &proc_net_inode_operations,
971 ip_mc_procinfo
972};
973#endif
974#endif
975
976
977
978
979
980__initfunc(void ip_init(void))
981{
982 dev_add_pack(&ip_packet_type);
983
984 ip_rt_init();
985
986#ifdef CONFIG_PROC_FS
987#ifdef CONFIG_IP_MULTICAST
988 proc_net_register(&proc_net_igmp);
989#endif
990#endif
991}
992
993