1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67#include <linux/module.h>
68#include <asm/uaccess.h>
69#include <asm/system.h>
70#include <linux/bitops.h>
71#include <linux/types.h>
72#include <linux/kernel.h>
73#include <linux/mm.h>
74#include <linux/bootmem.h>
75#include <linux/string.h>
76#include <linux/socket.h>
77#include <linux/sockios.h>
78#include <linux/errno.h>
79#include <linux/in.h>
80#include <linux/inet.h>
81#include <linux/netdevice.h>
82#include <linux/proc_fs.h>
83#include <linux/init.h>
84#include <linux/workqueue.h>
85#include <linux/skbuff.h>
86#include <linux/inetdevice.h>
87#include <linux/igmp.h>
88#include <linux/pkt_sched.h>
89#include <linux/mroute.h>
90#include <linux/netfilter_ipv4.h>
91#include <linux/random.h>
92#include <linux/jhash.h>
93#include <linux/rcupdate.h>
94#include <linux/times.h>
95#include <net/dst.h>
96#include <net/net_namespace.h>
97#include <net/protocol.h>
98#include <net/ip.h>
99#include <net/route.h>
100#include <net/inetpeer.h>
101#include <net/sock.h>
102#include <net/ip_fib.h>
103#include <net/arp.h>
104#include <net/tcp.h>
105#include <net/icmp.h>
106#include <net/xfrm.h>
107#include <net/netevent.h>
108#include <net/rtnetlink.h>
109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
111#endif
112
113#define RT_FL_TOS(oldflp) \
114 ((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
115
116#define IP_MAX_MTU 0xFFF0
117
118#define RT_GC_TIMEOUT (300*HZ)
119
120static int ip_rt_max_size;
121static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
122static int ip_rt_gc_interval __read_mostly = 60 * HZ;
123static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
124static int ip_rt_redirect_number __read_mostly = 9;
125static int ip_rt_redirect_load __read_mostly = HZ / 50;
126static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
127static int ip_rt_error_cost __read_mostly = HZ;
128static int ip_rt_error_burst __read_mostly = 5 * HZ;
129static int ip_rt_gc_elasticity __read_mostly = 8;
130static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
131static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
132static int ip_rt_min_advmss __read_mostly = 256;
133static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
134
135static void rt_worker_func(struct work_struct *work);
136static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
137static struct timer_list rt_secret_timer;
138
139
140
141
142
143static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
144static void ipv4_dst_destroy(struct dst_entry *dst);
145static void ipv4_dst_ifdown(struct dst_entry *dst,
146 struct net_device *dev, int how);
147static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
148static void ipv4_link_failure(struct sk_buff *skb);
149static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
150static int rt_garbage_collect(struct dst_ops *ops);
151
152
153static struct dst_ops ipv4_dst_ops = {
154 .family = AF_INET,
155 .protocol = __constant_htons(ETH_P_IP),
156 .gc = rt_garbage_collect,
157 .check = ipv4_dst_check,
158 .destroy = ipv4_dst_destroy,
159 .ifdown = ipv4_dst_ifdown,
160 .negative_advice = ipv4_negative_advice,
161 .link_failure = ipv4_link_failure,
162 .update_pmtu = ip_rt_update_pmtu,
163 .local_out = __ip_local_out,
164 .entry_size = sizeof(struct rtable),
165 .entries = ATOMIC_INIT(0),
166};
167
168#define ECN_OR_COST(class) TC_PRIO_##class
169
170const __u8 ip_tos2prio[16] = {
171 TC_PRIO_BESTEFFORT,
172 ECN_OR_COST(FILLER),
173 TC_PRIO_BESTEFFORT,
174 ECN_OR_COST(BESTEFFORT),
175 TC_PRIO_BULK,
176 ECN_OR_COST(BULK),
177 TC_PRIO_BULK,
178 ECN_OR_COST(BULK),
179 TC_PRIO_INTERACTIVE,
180 ECN_OR_COST(INTERACTIVE),
181 TC_PRIO_INTERACTIVE,
182 ECN_OR_COST(INTERACTIVE),
183 TC_PRIO_INTERACTIVE_BULK,
184 ECN_OR_COST(INTERACTIVE_BULK),
185 TC_PRIO_INTERACTIVE_BULK,
186 ECN_OR_COST(INTERACTIVE_BULK)
187};
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204struct rt_hash_bucket {
205 struct rtable *chain;
206};
207#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
208 defined(CONFIG_PROVE_LOCKING)
209
210
211
212
213
214#ifdef CONFIG_LOCKDEP
215# define RT_HASH_LOCK_SZ 256
216#else
217# if NR_CPUS >= 32
218# define RT_HASH_LOCK_SZ 4096
219# elif NR_CPUS >= 16
220# define RT_HASH_LOCK_SZ 2048
221# elif NR_CPUS >= 8
222# define RT_HASH_LOCK_SZ 1024
223# elif NR_CPUS >= 4
224# define RT_HASH_LOCK_SZ 512
225# else
226# define RT_HASH_LOCK_SZ 256
227# endif
228#endif
229
230static spinlock_t *rt_hash_locks;
231# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
232
233static __init void rt_hash_lock_init(void)
234{
235 int i;
236
237 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ,
238 GFP_KERNEL);
239 if (!rt_hash_locks)
240 panic("IP: failed to allocate rt_hash_locks\n");
241
242 for (i = 0; i < RT_HASH_LOCK_SZ; i++)
243 spin_lock_init(&rt_hash_locks[i]);
244}
245#else
246# define rt_hash_lock_addr(slot) NULL
247
248static inline void rt_hash_lock_init(void)
249{
250}
251#endif
252
253static struct rt_hash_bucket *rt_hash_table __read_mostly;
254static unsigned rt_hash_mask __read_mostly;
255static unsigned int rt_hash_log __read_mostly;
256static atomic_t rt_genid __read_mostly;
257
258static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
259#define RT_CACHE_STAT_INC(field) \
260 (__raw_get_cpu_var(rt_cache_stat).field++)
261
262static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
263{
264 return jhash_3words((__force u32)(__be32)(daddr),
265 (__force u32)(__be32)(saddr),
266 idx, atomic_read(&rt_genid))
267 & rt_hash_mask;
268}
269
270#ifdef CONFIG_PROC_FS
271struct rt_cache_iter_state {
272 struct seq_net_private p;
273 int bucket;
274 int genid;
275};
276
277static struct rtable *rt_cache_get_first(struct seq_file *seq)
278{
279 struct rt_cache_iter_state *st = seq->private;
280 struct rtable *r = NULL;
281
282 for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
283 rcu_read_lock_bh();
284 r = rcu_dereference(rt_hash_table[st->bucket].chain);
285 while (r) {
286 if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
287 r->rt_genid == st->genid)
288 return r;
289 r = rcu_dereference(r->u.dst.rt_next);
290 }
291 rcu_read_unlock_bh();
292 }
293 return r;
294}
295
296static struct rtable *__rt_cache_get_next(struct seq_file *seq,
297 struct rtable *r)
298{
299 struct rt_cache_iter_state *st = seq->private;
300 r = r->u.dst.rt_next;
301 while (!r) {
302 rcu_read_unlock_bh();
303 if (--st->bucket < 0)
304 break;
305 rcu_read_lock_bh();
306 r = rt_hash_table[st->bucket].chain;
307 }
308 return rcu_dereference(r);
309}
310
311static struct rtable *rt_cache_get_next(struct seq_file *seq,
312 struct rtable *r)
313{
314 struct rt_cache_iter_state *st = seq->private;
315 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
316 if (dev_net(r->u.dst.dev) != seq_file_net(seq))
317 continue;
318 if (r->rt_genid == st->genid)
319 break;
320 }
321 return r;
322}
323
324static struct rtable *rt_cache_get_idx(struct seq_file *seq, loff_t pos)
325{
326 struct rtable *r = rt_cache_get_first(seq);
327
328 if (r)
329 while (pos && (r = rt_cache_get_next(seq, r)))
330 --pos;
331 return pos ? NULL : r;
332}
333
334static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
335{
336 struct rt_cache_iter_state *st = seq->private;
337 if (*pos)
338 return rt_cache_get_idx(seq, *pos - 1);
339 st->genid = atomic_read(&rt_genid);
340 return SEQ_START_TOKEN;
341}
342
343static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
344{
345 struct rtable *r;
346
347 if (v == SEQ_START_TOKEN)
348 r = rt_cache_get_first(seq);
349 else
350 r = rt_cache_get_next(seq, v);
351 ++*pos;
352 return r;
353}
354
355static void rt_cache_seq_stop(struct seq_file *seq, void *v)
356{
357 if (v && v != SEQ_START_TOKEN)
358 rcu_read_unlock_bh();
359}
360
361static int rt_cache_seq_show(struct seq_file *seq, void *v)
362{
363 if (v == SEQ_START_TOKEN)
364 seq_printf(seq, "%-127s\n",
365 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
366 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
367 "HHUptod\tSpecDst");
368 else {
369 struct rtable *r = v;
370 int len;
371
372 seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
373 "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
374 r->u.dst.dev ? r->u.dst.dev->name : "*",
375 (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
376 r->rt_flags, atomic_read(&r->u.dst.__refcnt),
377 r->u.dst.__use, 0, (unsigned long)r->rt_src,
378 (dst_metric(&r->u.dst, RTAX_ADVMSS) ?
379 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
380 dst_metric(&r->u.dst, RTAX_WINDOW),
381 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
382 dst_metric(&r->u.dst, RTAX_RTTVAR)),
383 r->fl.fl4_tos,
384 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
385 r->u.dst.hh ? (r->u.dst.hh->hh_output ==
386 dev_queue_xmit) : 0,
387 r->rt_spec_dst, &len);
388
389 seq_printf(seq, "%*s\n", 127 - len, "");
390 }
391 return 0;
392}
393
394static const struct seq_operations rt_cache_seq_ops = {
395 .start = rt_cache_seq_start,
396 .next = rt_cache_seq_next,
397 .stop = rt_cache_seq_stop,
398 .show = rt_cache_seq_show,
399};
400
401static int rt_cache_seq_open(struct inode *inode, struct file *file)
402{
403 return seq_open_net(inode, file, &rt_cache_seq_ops,
404 sizeof(struct rt_cache_iter_state));
405}
406
407static const struct file_operations rt_cache_seq_fops = {
408 .owner = THIS_MODULE,
409 .open = rt_cache_seq_open,
410 .read = seq_read,
411 .llseek = seq_lseek,
412 .release = seq_release_net,
413};
414
415
416static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
417{
418 int cpu;
419
420 if (*pos == 0)
421 return SEQ_START_TOKEN;
422
423 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
424 if (!cpu_possible(cpu))
425 continue;
426 *pos = cpu+1;
427 return &per_cpu(rt_cache_stat, cpu);
428 }
429 return NULL;
430}
431
432static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
433{
434 int cpu;
435
436 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
437 if (!cpu_possible(cpu))
438 continue;
439 *pos = cpu+1;
440 return &per_cpu(rt_cache_stat, cpu);
441 }
442 return NULL;
443
444}
445
446static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
447{
448
449}
450
451static int rt_cpu_seq_show(struct seq_file *seq, void *v)
452{
453 struct rt_cache_stat *st = v;
454
455 if (v == SEQ_START_TOKEN) {
456 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
457 return 0;
458 }
459
460 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
461 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
462 atomic_read(&ipv4_dst_ops.entries),
463 st->in_hit,
464 st->in_slow_tot,
465 st->in_slow_mc,
466 st->in_no_route,
467 st->in_brd,
468 st->in_martian_dst,
469 st->in_martian_src,
470
471 st->out_hit,
472 st->out_slow_tot,
473 st->out_slow_mc,
474
475 st->gc_total,
476 st->gc_ignored,
477 st->gc_goal_miss,
478 st->gc_dst_overflow,
479 st->in_hlist_search,
480 st->out_hlist_search
481 );
482 return 0;
483}
484
485static const struct seq_operations rt_cpu_seq_ops = {
486 .start = rt_cpu_seq_start,
487 .next = rt_cpu_seq_next,
488 .stop = rt_cpu_seq_stop,
489 .show = rt_cpu_seq_show,
490};
491
492
493static int rt_cpu_seq_open(struct inode *inode, struct file *file)
494{
495 return seq_open(file, &rt_cpu_seq_ops);
496}
497
498static const struct file_operations rt_cpu_seq_fops = {
499 .owner = THIS_MODULE,
500 .open = rt_cpu_seq_open,
501 .read = seq_read,
502 .llseek = seq_lseek,
503 .release = seq_release,
504};
505
506#ifdef CONFIG_NET_CLS_ROUTE
507static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
508 int length, int *eof, void *data)
509{
510 unsigned int i;
511
512 if ((offset & 3) || (length & 3))
513 return -EIO;
514
515 if (offset >= sizeof(struct ip_rt_acct) * 256) {
516 *eof = 1;
517 return 0;
518 }
519
520 if (offset + length >= sizeof(struct ip_rt_acct) * 256) {
521 length = sizeof(struct ip_rt_acct) * 256 - offset;
522 *eof = 1;
523 }
524
525 offset /= sizeof(u32);
526
527 if (length > 0) {
528 u32 *dst = (u32 *) buffer;
529
530 *start = buffer;
531 memset(dst, 0, length);
532
533 for_each_possible_cpu(i) {
534 unsigned int j;
535 u32 *src;
536
537 src = ((u32 *) per_cpu_ptr(ip_rt_acct, i)) + offset;
538 for (j = 0; j < length/4; j++)
539 dst[j] += src[j];
540 }
541 }
542 return length;
543}
544#endif
545
546static int __net_init ip_rt_do_proc_init(struct net *net)
547{
548 struct proc_dir_entry *pde;
549
550 pde = proc_net_fops_create(net, "rt_cache", S_IRUGO,
551 &rt_cache_seq_fops);
552 if (!pde)
553 goto err1;
554
555 pde = proc_create("rt_cache", S_IRUGO,
556 net->proc_net_stat, &rt_cpu_seq_fops);
557 if (!pde)
558 goto err2;
559
560#ifdef CONFIG_NET_CLS_ROUTE
561 pde = create_proc_read_entry("rt_acct", 0, net->proc_net,
562 ip_rt_acct_read, NULL);
563 if (!pde)
564 goto err3;
565#endif
566 return 0;
567
568#ifdef CONFIG_NET_CLS_ROUTE
569err3:
570 remove_proc_entry("rt_cache", net->proc_net_stat);
571#endif
572err2:
573 remove_proc_entry("rt_cache", net->proc_net);
574err1:
575 return -ENOMEM;
576}
577
578static void __net_exit ip_rt_do_proc_exit(struct net *net)
579{
580 remove_proc_entry("rt_cache", net->proc_net_stat);
581 remove_proc_entry("rt_cache", net->proc_net);
582 remove_proc_entry("rt_acct", net->proc_net);
583}
584
585static struct pernet_operations ip_rt_proc_ops __net_initdata = {
586 .init = ip_rt_do_proc_init,
587 .exit = ip_rt_do_proc_exit,
588};
589
590static int __init ip_rt_proc_init(void)
591{
592 return register_pernet_subsys(&ip_rt_proc_ops);
593}
594
595#else
596static inline int ip_rt_proc_init(void)
597{
598 return 0;
599}
600#endif
601
602static inline void rt_free(struct rtable *rt)
603{
604 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
605}
606
607static inline void rt_drop(struct rtable *rt)
608{
609 ip_rt_put(rt);
610 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
611}
612
613static inline int rt_fast_clean(struct rtable *rth)
614{
615
616
617 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
618 rth->fl.iif && rth->u.dst.rt_next;
619}
620
621static inline int rt_valuable(struct rtable *rth)
622{
623 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
624 rth->u.dst.expires;
625}
626
627static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
628{
629 unsigned long age;
630 int ret = 0;
631
632 if (atomic_read(&rth->u.dst.__refcnt))
633 goto out;
634
635 ret = 1;
636 if (rth->u.dst.expires &&
637 time_after_eq(jiffies, rth->u.dst.expires))
638 goto out;
639
640 age = jiffies - rth->u.dst.lastuse;
641 ret = 0;
642 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
643 (age <= tmo2 && rt_valuable(rth)))
644 goto out;
645 ret = 1;
646out: return ret;
647}
648
649
650
651
652
653
654static inline u32 rt_score(struct rtable *rt)
655{
656 u32 score = jiffies - rt->u.dst.lastuse;
657
658 score = ~score & ~(3<<30);
659
660 if (rt_valuable(rt))
661 score |= (1<<31);
662
663 if (!rt->fl.iif ||
664 !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
665 score |= (1<<30);
666
667 return score;
668}
669
670static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
671{
672 return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
673 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
674 (fl1->mark ^ fl2->mark) |
675 (*(u16 *)&fl1->nl_u.ip4_u.tos ^
676 *(u16 *)&fl2->nl_u.ip4_u.tos) |
677 (fl1->oif ^ fl2->oif) |
678 (fl1->iif ^ fl2->iif)) == 0;
679}
680
681static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
682{
683 return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
684}
685
686
687
688
689
690
691static void rt_do_flush(int process_context)
692{
693 unsigned int i;
694 struct rtable *rth, *next;
695
696 for (i = 0; i <= rt_hash_mask; i++) {
697 if (process_context && need_resched())
698 cond_resched();
699 rth = rt_hash_table[i].chain;
700 if (!rth)
701 continue;
702
703 spin_lock_bh(rt_hash_lock_addr(i));
704 rth = rt_hash_table[i].chain;
705 rt_hash_table[i].chain = NULL;
706 spin_unlock_bh(rt_hash_lock_addr(i));
707
708 for (; rth; rth = next) {
709 next = rth->u.dst.rt_next;
710 rt_free(rth);
711 }
712 }
713}
714
715static void rt_check_expire(void)
716{
717 static unsigned int rover;
718 unsigned int i = rover, goal;
719 struct rtable *rth, **rthp;
720 u64 mult;
721
722 mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
723 if (ip_rt_gc_timeout > 1)
724 do_div(mult, ip_rt_gc_timeout);
725 goal = (unsigned int)mult;
726 if (goal > rt_hash_mask)
727 goal = rt_hash_mask + 1;
728 for (; goal > 0; goal--) {
729 unsigned long tmo = ip_rt_gc_timeout;
730
731 i = (i + 1) & rt_hash_mask;
732 rthp = &rt_hash_table[i].chain;
733
734 if (need_resched())
735 cond_resched();
736
737 if (*rthp == NULL)
738 continue;
739 spin_lock_bh(rt_hash_lock_addr(i));
740 while ((rth = *rthp) != NULL) {
741 if (rth->rt_genid != atomic_read(&rt_genid)) {
742 *rthp = rth->u.dst.rt_next;
743 rt_free(rth);
744 continue;
745 }
746 if (rth->u.dst.expires) {
747
748 if (time_before_eq(jiffies, rth->u.dst.expires)) {
749 tmo >>= 1;
750 rthp = &rth->u.dst.rt_next;
751 continue;
752 }
753 } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
754 tmo >>= 1;
755 rthp = &rth->u.dst.rt_next;
756 continue;
757 }
758
759
760 *rthp = rth->u.dst.rt_next;
761 rt_free(rth);
762 }
763 spin_unlock_bh(rt_hash_lock_addr(i));
764 }
765 rover = i;
766}
767
768
769
770
771
772static void rt_worker_func(struct work_struct *work)
773{
774 rt_check_expire();
775 schedule_delayed_work(&expires_work, ip_rt_gc_interval);
776}
777
778
779
780
781
782
783
784static void rt_cache_invalidate(void)
785{
786 unsigned char shuffle;
787
788 get_random_bytes(&shuffle, sizeof(shuffle));
789 atomic_add(shuffle + 1U, &rt_genid);
790}
791
792
793
794
795
796void rt_cache_flush(int delay)
797{
798 rt_cache_invalidate();
799 if (delay >= 0)
800 rt_do_flush(!in_softirq());
801}
802
803
804
805
806static void rt_secret_rebuild(unsigned long dummy)
807{
808 rt_cache_invalidate();
809 mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
810}
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825static int rt_garbage_collect(struct dst_ops *ops)
826{
827 static unsigned long expire = RT_GC_TIMEOUT;
828 static unsigned long last_gc;
829 static int rover;
830 static int equilibrium;
831 struct rtable *rth, **rthp;
832 unsigned long now = jiffies;
833 int goal;
834
835
836
837
838
839
840 RT_CACHE_STAT_INC(gc_total);
841
842 if (now - last_gc < ip_rt_gc_min_interval &&
843 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
844 RT_CACHE_STAT_INC(gc_ignored);
845 goto out;
846 }
847
848
849 goal = atomic_read(&ipv4_dst_ops.entries) -
850 (ip_rt_gc_elasticity << rt_hash_log);
851 if (goal <= 0) {
852 if (equilibrium < ipv4_dst_ops.gc_thresh)
853 equilibrium = ipv4_dst_ops.gc_thresh;
854 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
855 if (goal > 0) {
856 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
857 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
858 }
859 } else {
860
861
862
863 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
864 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
865 }
866
867 if (now - last_gc >= ip_rt_gc_min_interval)
868 last_gc = now;
869
870 if (goal <= 0) {
871 equilibrium += goal;
872 goto work_done;
873 }
874
875 do {
876 int i, k;
877
878 for (i = rt_hash_mask, k = rover; i >= 0; i--) {
879 unsigned long tmo = expire;
880
881 k = (k + 1) & rt_hash_mask;
882 rthp = &rt_hash_table[k].chain;
883 spin_lock_bh(rt_hash_lock_addr(k));
884 while ((rth = *rthp) != NULL) {
885 if (rth->rt_genid == atomic_read(&rt_genid) &&
886 !rt_may_expire(rth, tmo, expire)) {
887 tmo >>= 1;
888 rthp = &rth->u.dst.rt_next;
889 continue;
890 }
891 *rthp = rth->u.dst.rt_next;
892 rt_free(rth);
893 goal--;
894 }
895 spin_unlock_bh(rt_hash_lock_addr(k));
896 if (goal <= 0)
897 break;
898 }
899 rover = k;
900
901 if (goal <= 0)
902 goto work_done;
903
904
905
906
907
908
909
910
911
912
913 RT_CACHE_STAT_INC(gc_goal_miss);
914
915 if (expire == 0)
916 break;
917
918 expire >>= 1;
919#if RT_CACHE_DEBUG >= 2
920 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
921 atomic_read(&ipv4_dst_ops.entries), goal, i);
922#endif
923
924 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
925 goto out;
926 } while (!in_softirq() && time_before_eq(jiffies, now));
927
928 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
929 goto out;
930 if (net_ratelimit())
931 printk(KERN_WARNING "dst cache overflow\n");
932 RT_CACHE_STAT_INC(gc_dst_overflow);
933 return 1;
934
935work_done:
936 expire += ip_rt_gc_min_interval;
937 if (expire > ip_rt_gc_timeout ||
938 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
939 expire = ip_rt_gc_timeout;
940#if RT_CACHE_DEBUG >= 2
941 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
942 atomic_read(&ipv4_dst_ops.entries), goal, rover);
943#endif
944out: return 0;
945}
946
947static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
948{
949 struct rtable *rth, **rthp;
950 unsigned long now;
951 struct rtable *cand, **candp;
952 u32 min_score;
953 int chain_length;
954 int attempts = !in_softirq();
955
956restart:
957 chain_length = 0;
958 min_score = ~(u32)0;
959 cand = NULL;
960 candp = NULL;
961 now = jiffies;
962
963 rthp = &rt_hash_table[hash].chain;
964
965 spin_lock_bh(rt_hash_lock_addr(hash));
966 while ((rth = *rthp) != NULL) {
967 if (rth->rt_genid != atomic_read(&rt_genid)) {
968 *rthp = rth->u.dst.rt_next;
969 rt_free(rth);
970 continue;
971 }
972 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
973
974 *rthp = rth->u.dst.rt_next;
975
976
977
978
979
980 rcu_assign_pointer(rth->u.dst.rt_next,
981 rt_hash_table[hash].chain);
982
983
984
985
986 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
987
988 dst_use(&rth->u.dst, now);
989 spin_unlock_bh(rt_hash_lock_addr(hash));
990
991 rt_drop(rt);
992 *rp = rth;
993 return 0;
994 }
995
996 if (!atomic_read(&rth->u.dst.__refcnt)) {
997 u32 score = rt_score(rth);
998
999 if (score <= min_score) {
1000 cand = rth;
1001 candp = rthp;
1002 min_score = score;
1003 }
1004 }
1005
1006 chain_length++;
1007
1008 rthp = &rth->u.dst.rt_next;
1009 }
1010
1011 if (cand) {
1012
1013
1014
1015
1016
1017
1018 if (chain_length > ip_rt_gc_elasticity) {
1019 *candp = cand->u.dst.rt_next;
1020 rt_free(cand);
1021 }
1022 }
1023
1024
1025
1026
1027 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1028 int err = arp_bind_neighbour(&rt->u.dst);
1029 if (err) {
1030 spin_unlock_bh(rt_hash_lock_addr(hash));
1031
1032 if (err != -ENOBUFS) {
1033 rt_drop(rt);
1034 return err;
1035 }
1036
1037
1038
1039
1040
1041 if (attempts-- > 0) {
1042 int saved_elasticity = ip_rt_gc_elasticity;
1043 int saved_int = ip_rt_gc_min_interval;
1044 ip_rt_gc_elasticity = 1;
1045 ip_rt_gc_min_interval = 0;
1046 rt_garbage_collect(&ipv4_dst_ops);
1047 ip_rt_gc_min_interval = saved_int;
1048 ip_rt_gc_elasticity = saved_elasticity;
1049 goto restart;
1050 }
1051
1052 if (net_ratelimit())
1053 printk(KERN_WARNING "Neighbour table overflow.\n");
1054 rt_drop(rt);
1055 return -ENOBUFS;
1056 }
1057 }
1058
1059 rt->u.dst.rt_next = rt_hash_table[hash].chain;
1060#if RT_CACHE_DEBUG >= 2
1061 if (rt->u.dst.rt_next) {
1062 struct rtable *trt;
1063 printk(KERN_DEBUG "rt_cache @%02x: " NIPQUAD_FMT, hash,
1064 NIPQUAD(rt->rt_dst));
1065 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
1066 printk(" . " NIPQUAD_FMT, NIPQUAD(trt->rt_dst));
1067 printk("\n");
1068 }
1069#endif
1070 rt_hash_table[hash].chain = rt;
1071 spin_unlock_bh(rt_hash_lock_addr(hash));
1072 *rp = rt;
1073 return 0;
1074}
1075
1076void rt_bind_peer(struct rtable *rt, int create)
1077{
1078 static DEFINE_SPINLOCK(rt_peer_lock);
1079 struct inet_peer *peer;
1080
1081 peer = inet_getpeer(rt->rt_dst, create);
1082
1083 spin_lock_bh(&rt_peer_lock);
1084 if (rt->peer == NULL) {
1085 rt->peer = peer;
1086 peer = NULL;
1087 }
1088 spin_unlock_bh(&rt_peer_lock);
1089 if (peer)
1090 inet_putpeer(peer);
1091}
1092
1093
1094
1095
1096
1097
1098
1099
1100static void ip_select_fb_ident(struct iphdr *iph)
1101{
1102 static DEFINE_SPINLOCK(ip_fb_id_lock);
1103 static u32 ip_fallback_id;
1104 u32 salt;
1105
1106 spin_lock_bh(&ip_fb_id_lock);
1107 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
1108 iph->id = htons(salt & 0xFFFF);
1109 ip_fallback_id = salt;
1110 spin_unlock_bh(&ip_fb_id_lock);
1111}
1112
1113void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1114{
1115 struct rtable *rt = (struct rtable *) dst;
1116
1117 if (rt) {
1118 if (rt->peer == NULL)
1119 rt_bind_peer(rt, 1);
1120
1121
1122
1123
1124 if (rt->peer) {
1125 iph->id = htons(inet_getid(rt->peer, more));
1126 return;
1127 }
1128 } else
1129 printk(KERN_DEBUG "rt_bind_peer(0) @%p\n",
1130 __builtin_return_address(0));
1131
1132 ip_select_fb_ident(iph);
1133}
1134
1135static void rt_del(unsigned hash, struct rtable *rt)
1136{
1137 struct rtable **rthp, *aux;
1138
1139 rthp = &rt_hash_table[hash].chain;
1140 spin_lock_bh(rt_hash_lock_addr(hash));
1141 ip_rt_put(rt);
1142 while ((aux = *rthp) != NULL) {
1143 if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
1144 *rthp = aux->u.dst.rt_next;
1145 rt_free(aux);
1146 continue;
1147 }
1148 rthp = &aux->u.dst.rt_next;
1149 }
1150 spin_unlock_bh(rt_hash_lock_addr(hash));
1151}
1152
1153void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1154 __be32 saddr, struct net_device *dev)
1155{
1156 int i, k;
1157 struct in_device *in_dev = in_dev_get(dev);
1158 struct rtable *rth, **rthp;
1159 __be32 skeys[2] = { saddr, 0 };
1160 int ikeys[2] = { dev->ifindex, 0 };
1161 struct netevent_redirect netevent;
1162 struct net *net;
1163
1164 if (!in_dev)
1165 return;
1166
1167 net = dev_net(dev);
1168 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev)
1169 || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw)
1170 || ipv4_is_zeronet(new_gw))
1171 goto reject_redirect;
1172
1173 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
1174 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
1175 goto reject_redirect;
1176 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
1177 goto reject_redirect;
1178 } else {
1179 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
1180 goto reject_redirect;
1181 }
1182
1183 for (i = 0; i < 2; i++) {
1184 for (k = 0; k < 2; k++) {
1185 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
1186
1187 rthp=&rt_hash_table[hash].chain;
1188
1189 rcu_read_lock();
1190 while ((rth = rcu_dereference(*rthp)) != NULL) {
1191 struct rtable *rt;
1192
1193 if (rth->fl.fl4_dst != daddr ||
1194 rth->fl.fl4_src != skeys[i] ||
1195 rth->fl.oif != ikeys[k] ||
1196 rth->fl.iif != 0 ||
1197 rth->rt_genid != atomic_read(&rt_genid) ||
1198 !net_eq(dev_net(rth->u.dst.dev), net)) {
1199 rthp = &rth->u.dst.rt_next;
1200 continue;
1201 }
1202
1203 if (rth->rt_dst != daddr ||
1204 rth->rt_src != saddr ||
1205 rth->u.dst.error ||
1206 rth->rt_gateway != old_gw ||
1207 rth->u.dst.dev != dev)
1208 break;
1209
1210 dst_hold(&rth->u.dst);
1211 rcu_read_unlock();
1212
1213 rt = dst_alloc(&ipv4_dst_ops);
1214 if (rt == NULL) {
1215 ip_rt_put(rth);
1216 in_dev_put(in_dev);
1217 return;
1218 }
1219
1220
1221 *rt = *rth;
1222 INIT_RCU_HEAD(&rt->u.dst.rcu_head);
1223 rt->u.dst.__use = 1;
1224 atomic_set(&rt->u.dst.__refcnt, 1);
1225 rt->u.dst.child = NULL;
1226 if (rt->u.dst.dev)
1227 dev_hold(rt->u.dst.dev);
1228 if (rt->idev)
1229 in_dev_hold(rt->idev);
1230 rt->u.dst.obsolete = 0;
1231 rt->u.dst.lastuse = jiffies;
1232 rt->u.dst.path = &rt->u.dst;
1233 rt->u.dst.neighbour = NULL;
1234 rt->u.dst.hh = NULL;
1235 rt->u.dst.xfrm = NULL;
1236 rt->rt_genid = atomic_read(&rt_genid);
1237 rt->rt_flags |= RTCF_REDIRECTED;
1238
1239
1240 rt->rt_gateway = new_gw;
1241
1242
1243 dst_confirm(&rth->u.dst);
1244
1245 if (rt->peer)
1246 atomic_inc(&rt->peer->refcnt);
1247
1248 if (arp_bind_neighbour(&rt->u.dst) ||
1249 !(rt->u.dst.neighbour->nud_state &
1250 NUD_VALID)) {
1251 if (rt->u.dst.neighbour)
1252 neigh_event_send(rt->u.dst.neighbour, NULL);
1253 ip_rt_put(rth);
1254 rt_drop(rt);
1255 goto do_next;
1256 }
1257
1258 netevent.old = &rth->u.dst;
1259 netevent.new = &rt->u.dst;
1260 call_netevent_notifiers(NETEVENT_REDIRECT,
1261 &netevent);
1262
1263 rt_del(hash, rth);
1264 if (!rt_intern_hash(hash, rt, &rt))
1265 ip_rt_put(rt);
1266 goto do_next;
1267 }
1268 rcu_read_unlock();
1269 do_next:
1270 ;
1271 }
1272 }
1273 in_dev_put(in_dev);
1274 return;
1275
1276reject_redirect:
1277#ifdef CONFIG_IP_ROUTE_VERBOSE
1278 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
1279 printk(KERN_INFO "Redirect from " NIPQUAD_FMT " on %s about "
1280 NIPQUAD_FMT " ignored.\n"
1281 " Advised path = " NIPQUAD_FMT " -> " NIPQUAD_FMT "\n",
1282 NIPQUAD(old_gw), dev->name, NIPQUAD(new_gw),
1283 NIPQUAD(saddr), NIPQUAD(daddr));
1284#endif
1285 in_dev_put(in_dev);
1286}
1287
1288static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1289{
1290 struct rtable *rt = (struct rtable *)dst;
1291 struct dst_entry *ret = dst;
1292
1293 if (rt) {
1294 if (dst->obsolete) {
1295 ip_rt_put(rt);
1296 ret = NULL;
1297 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1298 rt->u.dst.expires) {
1299 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1300 rt->fl.oif);
1301#if RT_CACHE_DEBUG >= 1
1302 printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
1303 NIPQUAD_FMT "/%02x dropped\n",
1304 NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
1305#endif
1306 rt_del(hash, rt);
1307 ret = NULL;
1308 }
1309 }
1310 return ret;
1311}
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329void ip_rt_send_redirect(struct sk_buff *skb)
1330{
1331 struct rtable *rt = skb->rtable;
1332 struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
1333
1334 if (!in_dev)
1335 return;
1336
1337 if (!IN_DEV_TX_REDIRECTS(in_dev))
1338 goto out;
1339
1340
1341
1342
1343 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
1344 rt->u.dst.rate_tokens = 0;
1345
1346
1347
1348
1349 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
1350 rt->u.dst.rate_last = jiffies;
1351 goto out;
1352 }
1353
1354
1355
1356
1357 if (rt->u.dst.rate_tokens == 0 ||
1358 time_after(jiffies,
1359 (rt->u.dst.rate_last +
1360 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
1361 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1362 rt->u.dst.rate_last = jiffies;
1363 ++rt->u.dst.rate_tokens;
1364#ifdef CONFIG_IP_ROUTE_VERBOSE
1365 if (IN_DEV_LOG_MARTIANS(in_dev) &&
1366 rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1367 net_ratelimit())
1368 printk(KERN_WARNING "host " NIPQUAD_FMT "/if%d ignores "
1369 "redirects for " NIPQUAD_FMT " to " NIPQUAD_FMT ".\n",
1370 NIPQUAD(rt->rt_src), rt->rt_iif,
1371 NIPQUAD(rt->rt_dst), NIPQUAD(rt->rt_gateway));
1372#endif
1373 }
1374out:
1375 in_dev_put(in_dev);
1376}
1377
1378static int ip_error(struct sk_buff *skb)
1379{
1380 struct rtable *rt = skb->rtable;
1381 unsigned long now;
1382 int code;
1383
1384 switch (rt->u.dst.error) {
1385 case EINVAL:
1386 default:
1387 goto out;
1388 case EHOSTUNREACH:
1389 code = ICMP_HOST_UNREACH;
1390 break;
1391 case ENETUNREACH:
1392 code = ICMP_NET_UNREACH;
1393 IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
1394 break;
1395 case EACCES:
1396 code = ICMP_PKT_FILTERED;
1397 break;
1398 }
1399
1400 now = jiffies;
1401 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
1402 if (rt->u.dst.rate_tokens > ip_rt_error_burst)
1403 rt->u.dst.rate_tokens = ip_rt_error_burst;
1404 rt->u.dst.rate_last = now;
1405 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
1406 rt->u.dst.rate_tokens -= ip_rt_error_cost;
1407 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1408 }
1409
1410out: kfree_skb(skb);
1411 return 0;
1412}
1413
1414
1415
1416
1417
1418
1419static const unsigned short mtu_plateau[] =
1420{32000, 17914, 8166, 4352, 2002, 1492, 576, 296, 216, 128 };
1421
1422static inline unsigned short guess_mtu(unsigned short old_mtu)
1423{
1424 int i;
1425
1426 for (i = 0; i < ARRAY_SIZE(mtu_plateau); i++)
1427 if (old_mtu > mtu_plateau[i])
1428 return mtu_plateau[i];
1429 return 68;
1430}
1431
1432unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1433 unsigned short new_mtu,
1434 struct net_device *dev)
1435{
1436 int i, k;
1437 unsigned short old_mtu = ntohs(iph->tot_len);
1438 struct rtable *rth;
1439 int ikeys[2] = { dev->ifindex, 0 };
1440 __be32 skeys[2] = { iph->saddr, 0, };
1441 __be32 daddr = iph->daddr;
1442 unsigned short est_mtu = 0;
1443
1444 if (ipv4_config.no_pmtu_disc)
1445 return 0;
1446
1447 for (k = 0; k < 2; k++) {
1448 for (i = 0; i < 2; i++) {
1449 unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
1450
1451 rcu_read_lock();
1452 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1453 rth = rcu_dereference(rth->u.dst.rt_next)) {
1454 unsigned short mtu = new_mtu;
1455
1456 if (rth->fl.fl4_dst != daddr ||
1457 rth->fl.fl4_src != skeys[i] ||
1458 rth->rt_dst != daddr ||
1459 rth->rt_src != iph->saddr ||
1460 rth->fl.oif != ikeys[k] ||
1461 rth->fl.iif != 0 ||
1462 dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
1463 !net_eq(dev_net(rth->u.dst.dev), net) ||
1464 rth->rt_genid != atomic_read(&rt_genid))
1465 continue;
1466
1467 if (new_mtu < 68 || new_mtu >= old_mtu) {
1468
1469
1470 if (mtu == 0 &&
1471 old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) &&
1472 old_mtu >= 68 + (iph->ihl << 2))
1473 old_mtu -= iph->ihl << 2;
1474
1475 mtu = guess_mtu(old_mtu);
1476 }
1477 if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) {
1478 if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) {
1479 dst_confirm(&rth->u.dst);
1480 if (mtu < ip_rt_min_pmtu) {
1481 mtu = ip_rt_min_pmtu;
1482 rth->u.dst.metrics[RTAX_LOCK-1] |=
1483 (1 << RTAX_MTU);
1484 }
1485 rth->u.dst.metrics[RTAX_MTU-1] = mtu;
1486 dst_set_expires(&rth->u.dst,
1487 ip_rt_mtu_expires);
1488 }
1489 est_mtu = mtu;
1490 }
1491 }
1492 rcu_read_unlock();
1493 }
1494 }
1495 return est_mtu ? : new_mtu;
1496}
1497
1498static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1499{
1500 if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 &&
1501 !(dst_metric_locked(dst, RTAX_MTU))) {
1502 if (mtu < ip_rt_min_pmtu) {
1503 mtu = ip_rt_min_pmtu;
1504 dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
1505 }
1506 dst->metrics[RTAX_MTU-1] = mtu;
1507 dst_set_expires(dst, ip_rt_mtu_expires);
1508 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1509 }
1510}
1511
1512static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1513{
1514 return NULL;
1515}
1516
1517static void ipv4_dst_destroy(struct dst_entry *dst)
1518{
1519 struct rtable *rt = (struct rtable *) dst;
1520 struct inet_peer *peer = rt->peer;
1521 struct in_device *idev = rt->idev;
1522
1523 if (peer) {
1524 rt->peer = NULL;
1525 inet_putpeer(peer);
1526 }
1527
1528 if (idev) {
1529 rt->idev = NULL;
1530 in_dev_put(idev);
1531 }
1532}
1533
1534static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
1535 int how)
1536{
1537 struct rtable *rt = (struct rtable *) dst;
1538 struct in_device *idev = rt->idev;
1539 if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
1540 struct in_device *loopback_idev =
1541 in_dev_get(dev_net(dev)->loopback_dev);
1542 if (loopback_idev) {
1543 rt->idev = loopback_idev;
1544 in_dev_put(idev);
1545 }
1546 }
1547}
1548
1549static void ipv4_link_failure(struct sk_buff *skb)
1550{
1551 struct rtable *rt;
1552
1553 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1554
1555 rt = skb->rtable;
1556 if (rt)
1557 dst_set_expires(&rt->u.dst, 0);
1558}
1559
1560static int ip_rt_bug(struct sk_buff *skb)
1561{
1562 printk(KERN_DEBUG "ip_rt_bug: " NIPQUAD_FMT " -> " NIPQUAD_FMT ", %s\n",
1563 NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
1564 skb->dev ? skb->dev->name : "?");
1565 kfree_skb(skb);
1566 return 0;
1567}
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578void ip_rt_get_source(u8 *addr, struct rtable *rt)
1579{
1580 __be32 src;
1581 struct fib_result res;
1582
1583 if (rt->fl.iif == 0)
1584 src = rt->rt_src;
1585 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
1586 src = FIB_RES_PREFSRC(res);
1587 fib_res_put(&res);
1588 } else
1589 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
1590 RT_SCOPE_UNIVERSE);
1591 memcpy(addr, &src, 4);
1592}
1593
1594#ifdef CONFIG_NET_CLS_ROUTE
1595static void set_class_tag(struct rtable *rt, u32 tag)
1596{
1597 if (!(rt->u.dst.tclassid & 0xFFFF))
1598 rt->u.dst.tclassid |= tag & 0xFFFF;
1599 if (!(rt->u.dst.tclassid & 0xFFFF0000))
1600 rt->u.dst.tclassid |= tag & 0xFFFF0000;
1601}
1602#endif
1603
1604static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1605{
1606 struct fib_info *fi = res->fi;
1607
1608 if (fi) {
1609 if (FIB_RES_GW(*res) &&
1610 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1611 rt->rt_gateway = FIB_RES_GW(*res);
1612 memcpy(rt->u.dst.metrics, fi->fib_metrics,
1613 sizeof(rt->u.dst.metrics));
1614 if (fi->fib_mtu == 0) {
1615 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
1616 if (dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1617 rt->rt_gateway != rt->rt_dst &&
1618 rt->u.dst.dev->mtu > 576)
1619 rt->u.dst.metrics[RTAX_MTU-1] = 576;
1620 }
1621#ifdef CONFIG_NET_CLS_ROUTE
1622 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1623#endif
1624 } else
1625 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
1626
1627 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1628 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1629 if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU)
1630 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1631 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
1632 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
1633 ip_rt_min_advmss);
1634 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40)
1635 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1636
1637#ifdef CONFIG_NET_CLS_ROUTE
1638#ifdef CONFIG_IP_MULTIPLE_TABLES
1639 set_class_tag(rt, fib_rules_tclass(res));
1640#endif
1641 set_class_tag(rt, itag);
1642#endif
1643 rt->rt_type = res->type;
1644}
1645
1646static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1647 u8 tos, struct net_device *dev, int our)
1648{
1649 unsigned hash;
1650 struct rtable *rth;
1651 __be32 spec_dst;
1652 struct in_device *in_dev = in_dev_get(dev);
1653 u32 itag = 0;
1654
1655
1656
1657 if (in_dev == NULL)
1658 return -EINVAL;
1659
1660 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1661 ipv4_is_loopback(saddr) || skb->protocol != htons(ETH_P_IP))
1662 goto e_inval;
1663
1664 if (ipv4_is_zeronet(saddr)) {
1665 if (!ipv4_is_local_multicast(daddr))
1666 goto e_inval;
1667 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1668 } else if (fib_validate_source(saddr, 0, tos, 0,
1669 dev, &spec_dst, &itag) < 0)
1670 goto e_inval;
1671
1672 rth = dst_alloc(&ipv4_dst_ops);
1673 if (!rth)
1674 goto e_nobufs;
1675
1676 rth->u.dst.output= ip_rt_bug;
1677
1678 atomic_set(&rth->u.dst.__refcnt, 1);
1679 rth->u.dst.flags= DST_HOST;
1680 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1681 rth->u.dst.flags |= DST_NOPOLICY;
1682 rth->fl.fl4_dst = daddr;
1683 rth->rt_dst = daddr;
1684 rth->fl.fl4_tos = tos;
1685 rth->fl.mark = skb->mark;
1686 rth->fl.fl4_src = saddr;
1687 rth->rt_src = saddr;
1688#ifdef CONFIG_NET_CLS_ROUTE
1689 rth->u.dst.tclassid = itag;
1690#endif
1691 rth->rt_iif =
1692 rth->fl.iif = dev->ifindex;
1693 rth->u.dst.dev = init_net.loopback_dev;
1694 dev_hold(rth->u.dst.dev);
1695 rth->idev = in_dev_get(rth->u.dst.dev);
1696 rth->fl.oif = 0;
1697 rth->rt_gateway = daddr;
1698 rth->rt_spec_dst= spec_dst;
1699 rth->rt_genid = atomic_read(&rt_genid);
1700 rth->rt_flags = RTCF_MULTICAST;
1701 rth->rt_type = RTN_MULTICAST;
1702 if (our) {
1703 rth->u.dst.input= ip_local_deliver;
1704 rth->rt_flags |= RTCF_LOCAL;
1705 }
1706
1707#ifdef CONFIG_IP_MROUTE
1708 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1709 rth->u.dst.input = ip_mr_input;
1710#endif
1711 RT_CACHE_STAT_INC(in_slow_mc);
1712
1713 in_dev_put(in_dev);
1714 hash = rt_hash(daddr, saddr, dev->ifindex);
1715 return rt_intern_hash(hash, rth, &skb->rtable);
1716
1717e_nobufs:
1718 in_dev_put(in_dev);
1719 return -ENOBUFS;
1720
1721e_inval:
1722 in_dev_put(in_dev);
1723 return -EINVAL;
1724}
1725
1726
1727static void ip_handle_martian_source(struct net_device *dev,
1728 struct in_device *in_dev,
1729 struct sk_buff *skb,
1730 __be32 daddr,
1731 __be32 saddr)
1732{
1733 RT_CACHE_STAT_INC(in_martian_src);
1734#ifdef CONFIG_IP_ROUTE_VERBOSE
1735 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1736
1737
1738
1739
1740 printk(KERN_WARNING "martian source " NIPQUAD_FMT " from "
1741 NIPQUAD_FMT", on dev %s\n",
1742 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1743 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
1744 int i;
1745 const unsigned char *p = skb_mac_header(skb);
1746 printk(KERN_WARNING "ll header: ");
1747 for (i = 0; i < dev->hard_header_len; i++, p++) {
1748 printk("%02x", *p);
1749 if (i < (dev->hard_header_len - 1))
1750 printk(":");
1751 }
1752 printk("\n");
1753 }
1754 }
1755#endif
1756}
1757
1758static int __mkroute_input(struct sk_buff *skb,
1759 struct fib_result *res,
1760 struct in_device *in_dev,
1761 __be32 daddr, __be32 saddr, u32 tos,
1762 struct rtable **result)
1763{
1764
1765 struct rtable *rth;
1766 int err;
1767 struct in_device *out_dev;
1768 unsigned flags = 0;
1769 __be32 spec_dst;
1770 u32 itag;
1771
1772
1773 out_dev = in_dev_get(FIB_RES_DEV(*res));
1774 if (out_dev == NULL) {
1775 if (net_ratelimit())
1776 printk(KERN_CRIT "Bug in ip_route_input" \
1777 "_slow(). Please, report\n");
1778 return -EINVAL;
1779 }
1780
1781
1782 err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),
1783 in_dev->dev, &spec_dst, &itag);
1784 if (err < 0) {
1785 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
1786 saddr);
1787
1788 err = -EINVAL;
1789 goto cleanup;
1790 }
1791
1792 if (err)
1793 flags |= RTCF_DIRECTSRC;
1794
1795 if (out_dev == in_dev && err &&
1796 (IN_DEV_SHARED_MEDIA(out_dev) ||
1797 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1798 flags |= RTCF_DOREDIRECT;
1799
1800 if (skb->protocol != htons(ETH_P_IP)) {
1801
1802
1803
1804 if (out_dev == in_dev) {
1805 err = -EINVAL;
1806 goto cleanup;
1807 }
1808 }
1809
1810
1811 rth = dst_alloc(&ipv4_dst_ops);
1812 if (!rth) {
1813 err = -ENOBUFS;
1814 goto cleanup;
1815 }
1816
1817 atomic_set(&rth->u.dst.__refcnt, 1);
1818 rth->u.dst.flags= DST_HOST;
1819 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1820 rth->u.dst.flags |= DST_NOPOLICY;
1821 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
1822 rth->u.dst.flags |= DST_NOXFRM;
1823 rth->fl.fl4_dst = daddr;
1824 rth->rt_dst = daddr;
1825 rth->fl.fl4_tos = tos;
1826 rth->fl.mark = skb->mark;
1827 rth->fl.fl4_src = saddr;
1828 rth->rt_src = saddr;
1829 rth->rt_gateway = daddr;
1830 rth->rt_iif =
1831 rth->fl.iif = in_dev->dev->ifindex;
1832 rth->u.dst.dev = (out_dev)->dev;
1833 dev_hold(rth->u.dst.dev);
1834 rth->idev = in_dev_get(rth->u.dst.dev);
1835 rth->fl.oif = 0;
1836 rth->rt_spec_dst= spec_dst;
1837
1838 rth->u.dst.input = ip_forward;
1839 rth->u.dst.output = ip_output;
1840 rth->rt_genid = atomic_read(&rt_genid);
1841
1842 rt_set_nexthop(rth, res, itag);
1843
1844 rth->rt_flags = flags;
1845
1846 *result = rth;
1847 err = 0;
1848 cleanup:
1849
1850 in_dev_put(out_dev);
1851 return err;
1852}
1853
1854static int ip_mkroute_input(struct sk_buff *skb,
1855 struct fib_result *res,
1856 const struct flowi *fl,
1857 struct in_device *in_dev,
1858 __be32 daddr, __be32 saddr, u32 tos)
1859{
1860 struct rtable* rth = NULL;
1861 int err;
1862 unsigned hash;
1863
1864#ifdef CONFIG_IP_ROUTE_MULTIPATH
1865 if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1866 fib_select_multipath(fl, res);
1867#endif
1868
1869
1870 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1871 if (err)
1872 return err;
1873
1874
1875 hash = rt_hash(daddr, saddr, fl->iif);
1876 return rt_intern_hash(hash, rth, &skb->rtable);
1877}
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1890 u8 tos, struct net_device *dev)
1891{
1892 struct fib_result res;
1893 struct in_device *in_dev = in_dev_get(dev);
1894 struct flowi fl = { .nl_u = { .ip4_u =
1895 { .daddr = daddr,
1896 .saddr = saddr,
1897 .tos = tos,
1898 .scope = RT_SCOPE_UNIVERSE,
1899 } },
1900 .mark = skb->mark,
1901 .iif = dev->ifindex };
1902 unsigned flags = 0;
1903 u32 itag = 0;
1904 struct rtable * rth;
1905 unsigned hash;
1906 __be32 spec_dst;
1907 int err = -EINVAL;
1908 int free_res = 0;
1909 struct net * net = dev_net(dev);
1910
1911
1912
1913 if (!in_dev)
1914 goto out;
1915
1916
1917
1918
1919
1920 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
1921 ipv4_is_loopback(saddr))
1922 goto martian_source;
1923
1924 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
1925 goto brd_input;
1926
1927
1928
1929
1930 if (ipv4_is_zeronet(saddr))
1931 goto martian_source;
1932
1933 if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) ||
1934 ipv4_is_loopback(daddr))
1935 goto martian_destination;
1936
1937
1938
1939
1940 if ((err = fib_lookup(net, &fl, &res)) != 0) {
1941 if (!IN_DEV_FORWARD(in_dev))
1942 goto e_hostunreach;
1943 goto no_route;
1944 }
1945 free_res = 1;
1946
1947 RT_CACHE_STAT_INC(in_slow_tot);
1948
1949 if (res.type == RTN_BROADCAST)
1950 goto brd_input;
1951
1952 if (res.type == RTN_LOCAL) {
1953 int result;
1954 result = fib_validate_source(saddr, daddr, tos,
1955 net->loopback_dev->ifindex,
1956 dev, &spec_dst, &itag);
1957 if (result < 0)
1958 goto martian_source;
1959 if (result)
1960 flags |= RTCF_DIRECTSRC;
1961 spec_dst = daddr;
1962 goto local_input;
1963 }
1964
1965 if (!IN_DEV_FORWARD(in_dev))
1966 goto e_hostunreach;
1967 if (res.type != RTN_UNICAST)
1968 goto martian_destination;
1969
1970 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1971done:
1972 in_dev_put(in_dev);
1973 if (free_res)
1974 fib_res_put(&res);
1975out: return err;
1976
1977brd_input:
1978 if (skb->protocol != htons(ETH_P_IP))
1979 goto e_inval;
1980
1981 if (ipv4_is_zeronet(saddr))
1982 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1983 else {
1984 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1985 &itag);
1986 if (err < 0)
1987 goto martian_source;
1988 if (err)
1989 flags |= RTCF_DIRECTSRC;
1990 }
1991 flags |= RTCF_BROADCAST;
1992 res.type = RTN_BROADCAST;
1993 RT_CACHE_STAT_INC(in_brd);
1994
1995local_input:
1996 rth = dst_alloc(&ipv4_dst_ops);
1997 if (!rth)
1998 goto e_nobufs;
1999
2000 rth->u.dst.output= ip_rt_bug;
2001 rth->rt_genid = atomic_read(&rt_genid);
2002
2003 atomic_set(&rth->u.dst.__refcnt, 1);
2004 rth->u.dst.flags= DST_HOST;
2005 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2006 rth->u.dst.flags |= DST_NOPOLICY;
2007 rth->fl.fl4_dst = daddr;
2008 rth->rt_dst = daddr;
2009 rth->fl.fl4_tos = tos;
2010 rth->fl.mark = skb->mark;
2011 rth->fl.fl4_src = saddr;
2012 rth->rt_src = saddr;
2013#ifdef CONFIG_NET_CLS_ROUTE
2014 rth->u.dst.tclassid = itag;
2015#endif
2016 rth->rt_iif =
2017 rth->fl.iif = dev->ifindex;
2018 rth->u.dst.dev = net->loopback_dev;
2019 dev_hold(rth->u.dst.dev);
2020 rth->idev = in_dev_get(rth->u.dst.dev);
2021 rth->rt_gateway = daddr;
2022 rth->rt_spec_dst= spec_dst;
2023 rth->u.dst.input= ip_local_deliver;
2024 rth->rt_flags = flags|RTCF_LOCAL;
2025 if (res.type == RTN_UNREACHABLE) {
2026 rth->u.dst.input= ip_error;
2027 rth->u.dst.error= -err;
2028 rth->rt_flags &= ~RTCF_LOCAL;
2029 }
2030 rth->rt_type = res.type;
2031 hash = rt_hash(daddr, saddr, fl.iif);
2032 err = rt_intern_hash(hash, rth, &skb->rtable);
2033 goto done;
2034
2035no_route:
2036 RT_CACHE_STAT_INC(in_no_route);
2037 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
2038 res.type = RTN_UNREACHABLE;
2039 if (err == -ESRCH)
2040 err = -ENETUNREACH;
2041 goto local_input;
2042
2043
2044
2045
2046martian_destination:
2047 RT_CACHE_STAT_INC(in_martian_dst);
2048#ifdef CONFIG_IP_ROUTE_VERBOSE
2049 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
2050 printk(KERN_WARNING "martian destination " NIPQUAD_FMT " from "
2051 NIPQUAD_FMT ", dev %s\n",
2052 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
2053#endif
2054
2055e_hostunreach:
2056 err = -EHOSTUNREACH;
2057 goto done;
2058
2059e_inval:
2060 err = -EINVAL;
2061 goto done;
2062
2063e_nobufs:
2064 err = -ENOBUFS;
2065 goto done;
2066
2067martian_source:
2068 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2069 goto e_inval;
2070}
2071
2072int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2073 u8 tos, struct net_device *dev)
2074{
2075 struct rtable * rth;
2076 unsigned hash;
2077 int iif = dev->ifindex;
2078 struct net *net;
2079
2080 net = dev_net(dev);
2081 tos &= IPTOS_RT_MASK;
2082 hash = rt_hash(daddr, saddr, iif);
2083
2084 rcu_read_lock();
2085 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2086 rth = rcu_dereference(rth->u.dst.rt_next)) {
2087 if (((rth->fl.fl4_dst ^ daddr) |
2088 (rth->fl.fl4_src ^ saddr) |
2089 (rth->fl.iif ^ iif) |
2090 rth->fl.oif |
2091 (rth->fl.fl4_tos ^ tos)) == 0 &&
2092 rth->fl.mark == skb->mark &&
2093 net_eq(dev_net(rth->u.dst.dev), net) &&
2094 rth->rt_genid == atomic_read(&rt_genid)) {
2095 dst_use(&rth->u.dst, jiffies);
2096 RT_CACHE_STAT_INC(in_hit);
2097 rcu_read_unlock();
2098 skb->rtable = rth;
2099 return 0;
2100 }
2101 RT_CACHE_STAT_INC(in_hlist_search);
2102 }
2103 rcu_read_unlock();
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116 if (ipv4_is_multicast(daddr)) {
2117 struct in_device *in_dev;
2118
2119 rcu_read_lock();
2120 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2121 int our = ip_check_mc(in_dev, daddr, saddr,
2122 ip_hdr(skb)->protocol);
2123 if (our
2124#ifdef CONFIG_IP_MROUTE
2125 || (!ipv4_is_local_multicast(daddr) &&
2126 IN_DEV_MFORWARD(in_dev))
2127#endif
2128 ) {
2129 rcu_read_unlock();
2130 return ip_route_input_mc(skb, daddr, saddr,
2131 tos, dev, our);
2132 }
2133 }
2134 rcu_read_unlock();
2135 return -EINVAL;
2136 }
2137 return ip_route_input_slow(skb, daddr, saddr, tos, dev);
2138}
2139
2140static int __mkroute_output(struct rtable **result,
2141 struct fib_result *res,
2142 const struct flowi *fl,
2143 const struct flowi *oldflp,
2144 struct net_device *dev_out,
2145 unsigned flags)
2146{
2147 struct rtable *rth;
2148 struct in_device *in_dev;
2149 u32 tos = RT_FL_TOS(oldflp);
2150 int err = 0;
2151
2152 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
2153 return -EINVAL;
2154
2155 if (fl->fl4_dst == htonl(0xFFFFFFFF))
2156 res->type = RTN_BROADCAST;
2157 else if (ipv4_is_multicast(fl->fl4_dst))
2158 res->type = RTN_MULTICAST;
2159 else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst))
2160 return -EINVAL;
2161
2162 if (dev_out->flags & IFF_LOOPBACK)
2163 flags |= RTCF_LOCAL;
2164
2165
2166 in_dev = in_dev_get(dev_out);
2167 if (!in_dev)
2168 return -EINVAL;
2169
2170 if (res->type == RTN_BROADCAST) {
2171 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2172 if (res->fi) {
2173 fib_info_put(res->fi);
2174 res->fi = NULL;
2175 }
2176 } else if (res->type == RTN_MULTICAST) {
2177 flags |= RTCF_MULTICAST|RTCF_LOCAL;
2178 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2179 oldflp->proto))
2180 flags &= ~RTCF_LOCAL;
2181
2182
2183
2184
2185 if (res->fi && res->prefixlen < 4) {
2186 fib_info_put(res->fi);
2187 res->fi = NULL;
2188 }
2189 }
2190
2191
2192 rth = dst_alloc(&ipv4_dst_ops);
2193 if (!rth) {
2194 err = -ENOBUFS;
2195 goto cleanup;
2196 }
2197
2198 atomic_set(&rth->u.dst.__refcnt, 1);
2199 rth->u.dst.flags= DST_HOST;
2200 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2201 rth->u.dst.flags |= DST_NOXFRM;
2202 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2203 rth->u.dst.flags |= DST_NOPOLICY;
2204
2205 rth->fl.fl4_dst = oldflp->fl4_dst;
2206 rth->fl.fl4_tos = tos;
2207 rth->fl.fl4_src = oldflp->fl4_src;
2208 rth->fl.oif = oldflp->oif;
2209 rth->fl.mark = oldflp->mark;
2210 rth->rt_dst = fl->fl4_dst;
2211 rth->rt_src = fl->fl4_src;
2212 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2213
2214
2215 rth->u.dst.dev = dev_out;
2216 dev_hold(dev_out);
2217 rth->idev = in_dev_get(dev_out);
2218 rth->rt_gateway = fl->fl4_dst;
2219 rth->rt_spec_dst= fl->fl4_src;
2220
2221 rth->u.dst.output=ip_output;
2222 rth->rt_genid = atomic_read(&rt_genid);
2223
2224 RT_CACHE_STAT_INC(out_slow_tot);
2225
2226 if (flags & RTCF_LOCAL) {
2227 rth->u.dst.input = ip_local_deliver;
2228 rth->rt_spec_dst = fl->fl4_dst;
2229 }
2230 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2231 rth->rt_spec_dst = fl->fl4_src;
2232 if (flags & RTCF_LOCAL &&
2233 !(dev_out->flags & IFF_LOOPBACK)) {
2234 rth->u.dst.output = ip_mc_output;
2235 RT_CACHE_STAT_INC(out_slow_mc);
2236 }
2237#ifdef CONFIG_IP_MROUTE
2238 if (res->type == RTN_MULTICAST) {
2239 if (IN_DEV_MFORWARD(in_dev) &&
2240 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2241 rth->u.dst.input = ip_mr_input;
2242 rth->u.dst.output = ip_mc_output;
2243 }
2244 }
2245#endif
2246 }
2247
2248 rt_set_nexthop(rth, res, 0);
2249
2250 rth->rt_flags = flags;
2251
2252 *result = rth;
2253 cleanup:
2254
2255 in_dev_put(in_dev);
2256
2257 return err;
2258}
2259
2260static int ip_mkroute_output(struct rtable **rp,
2261 struct fib_result *res,
2262 const struct flowi *fl,
2263 const struct flowi *oldflp,
2264 struct net_device *dev_out,
2265 unsigned flags)
2266{
2267 struct rtable *rth = NULL;
2268 int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
2269 unsigned hash;
2270 if (err == 0) {
2271 hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
2272 err = rt_intern_hash(hash, rth, rp);
2273 }
2274
2275 return err;
2276}
2277
2278
2279
2280
2281
2282static int ip_route_output_slow(struct net *net, struct rtable **rp,
2283 const struct flowi *oldflp)
2284{
2285 u32 tos = RT_FL_TOS(oldflp);
2286 struct flowi fl = { .nl_u = { .ip4_u =
2287 { .daddr = oldflp->fl4_dst,
2288 .saddr = oldflp->fl4_src,
2289 .tos = tos & IPTOS_RT_MASK,
2290 .scope = ((tos & RTO_ONLINK) ?
2291 RT_SCOPE_LINK :
2292 RT_SCOPE_UNIVERSE),
2293 } },
2294 .mark = oldflp->mark,
2295 .iif = net->loopback_dev->ifindex,
2296 .oif = oldflp->oif };
2297 struct fib_result res;
2298 unsigned flags = 0;
2299 struct net_device *dev_out = NULL;
2300 int free_res = 0;
2301 int err;
2302
2303
2304 res.fi = NULL;
2305#ifdef CONFIG_IP_MULTIPLE_TABLES
2306 res.r = NULL;
2307#endif
2308
2309 if (oldflp->fl4_src) {
2310 err = -EINVAL;
2311 if (ipv4_is_multicast(oldflp->fl4_src) ||
2312 ipv4_is_lbcast(oldflp->fl4_src) ||
2313 ipv4_is_zeronet(oldflp->fl4_src))
2314 goto out;
2315
2316
2317 dev_out = ip_dev_find(net, oldflp->fl4_src);
2318 if (dev_out == NULL)
2319 goto out;
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329 if (oldflp->oif == 0
2330 && (ipv4_is_multicast(oldflp->fl4_dst) ||
2331 oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347 fl.oif = dev_out->ifindex;
2348 goto make_route;
2349 }
2350 if (dev_out)
2351 dev_put(dev_out);
2352 dev_out = NULL;
2353 }
2354
2355
2356 if (oldflp->oif) {
2357 dev_out = dev_get_by_index(net, oldflp->oif);
2358 err = -ENODEV;
2359 if (dev_out == NULL)
2360 goto out;
2361
2362
2363 if (__in_dev_get_rtnl(dev_out) == NULL) {
2364 dev_put(dev_out);
2365 goto out;
2366 }
2367
2368 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2369 oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
2370 if (!fl.fl4_src)
2371 fl.fl4_src = inet_select_addr(dev_out, 0,
2372 RT_SCOPE_LINK);
2373 goto make_route;
2374 }
2375 if (!fl.fl4_src) {
2376 if (ipv4_is_multicast(oldflp->fl4_dst))
2377 fl.fl4_src = inet_select_addr(dev_out, 0,
2378 fl.fl4_scope);
2379 else if (!oldflp->fl4_dst)
2380 fl.fl4_src = inet_select_addr(dev_out, 0,
2381 RT_SCOPE_HOST);
2382 }
2383 }
2384
2385 if (!fl.fl4_dst) {
2386 fl.fl4_dst = fl.fl4_src;
2387 if (!fl.fl4_dst)
2388 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2389 if (dev_out)
2390 dev_put(dev_out);
2391 dev_out = net->loopback_dev;
2392 dev_hold(dev_out);
2393 fl.oif = net->loopback_dev->ifindex;
2394 res.type = RTN_LOCAL;
2395 flags |= RTCF_LOCAL;
2396 goto make_route;
2397 }
2398
2399 if (fib_lookup(net, &fl, &res)) {
2400 res.fi = NULL;
2401 if (oldflp->oif) {
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420 if (fl.fl4_src == 0)
2421 fl.fl4_src = inet_select_addr(dev_out, 0,
2422 RT_SCOPE_LINK);
2423 res.type = RTN_UNICAST;
2424 goto make_route;
2425 }
2426 if (dev_out)
2427 dev_put(dev_out);
2428 err = -ENETUNREACH;
2429 goto out;
2430 }
2431 free_res = 1;
2432
2433 if (res.type == RTN_LOCAL) {
2434 if (!fl.fl4_src)
2435 fl.fl4_src = fl.fl4_dst;
2436 if (dev_out)
2437 dev_put(dev_out);
2438 dev_out = net->loopback_dev;
2439 dev_hold(dev_out);
2440 fl.oif = dev_out->ifindex;
2441 if (res.fi)
2442 fib_info_put(res.fi);
2443 res.fi = NULL;
2444 flags |= RTCF_LOCAL;
2445 goto make_route;
2446 }
2447
2448#ifdef CONFIG_IP_ROUTE_MULTIPATH
2449 if (res.fi->fib_nhs > 1 && fl.oif == 0)
2450 fib_select_multipath(&fl, &res);
2451 else
2452#endif
2453 if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
2454 fib_select_default(net, &fl, &res);
2455
2456 if (!fl.fl4_src)
2457 fl.fl4_src = FIB_RES_PREFSRC(res);
2458
2459 if (dev_out)
2460 dev_put(dev_out);
2461 dev_out = FIB_RES_DEV(res);
2462 dev_hold(dev_out);
2463 fl.oif = dev_out->ifindex;
2464
2465
2466make_route:
2467 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2468
2469
2470 if (free_res)
2471 fib_res_put(&res);
2472 if (dev_out)
2473 dev_put(dev_out);
2474out: return err;
2475}
2476
2477int __ip_route_output_key(struct net *net, struct rtable **rp,
2478 const struct flowi *flp)
2479{
2480 unsigned hash;
2481 struct rtable *rth;
2482
2483 hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
2484
2485 rcu_read_lock_bh();
2486 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2487 rth = rcu_dereference(rth->u.dst.rt_next)) {
2488 if (rth->fl.fl4_dst == flp->fl4_dst &&
2489 rth->fl.fl4_src == flp->fl4_src &&
2490 rth->fl.iif == 0 &&
2491 rth->fl.oif == flp->oif &&
2492 rth->fl.mark == flp->mark &&
2493 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2494 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2495 net_eq(dev_net(rth->u.dst.dev), net) &&
2496 rth->rt_genid == atomic_read(&rt_genid)) {
2497 dst_use(&rth->u.dst, jiffies);
2498 RT_CACHE_STAT_INC(out_hit);
2499 rcu_read_unlock_bh();
2500 *rp = rth;
2501 return 0;
2502 }
2503 RT_CACHE_STAT_INC(out_hlist_search);
2504 }
2505 rcu_read_unlock_bh();
2506
2507 return ip_route_output_slow(net, rp, flp);
2508}
2509
2510EXPORT_SYMBOL_GPL(__ip_route_output_key);
2511
2512static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2513{
2514}
2515
2516static struct dst_ops ipv4_dst_blackhole_ops = {
2517 .family = AF_INET,
2518 .protocol = __constant_htons(ETH_P_IP),
2519 .destroy = ipv4_dst_destroy,
2520 .check = ipv4_dst_check,
2521 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2522 .entry_size = sizeof(struct rtable),
2523 .entries = ATOMIC_INIT(0),
2524};
2525
2526
2527static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
2528{
2529 struct rtable *ort = *rp;
2530 struct rtable *rt = (struct rtable *)
2531 dst_alloc(&ipv4_dst_blackhole_ops);
2532
2533 if (rt) {
2534 struct dst_entry *new = &rt->u.dst;
2535
2536 atomic_set(&new->__refcnt, 1);
2537 new->__use = 1;
2538 new->input = dst_discard;
2539 new->output = dst_discard;
2540 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
2541
2542 new->dev = ort->u.dst.dev;
2543 if (new->dev)
2544 dev_hold(new->dev);
2545
2546 rt->fl = ort->fl;
2547
2548 rt->idev = ort->idev;
2549 if (rt->idev)
2550 in_dev_hold(rt->idev);
2551 rt->rt_genid = atomic_read(&rt_genid);
2552 rt->rt_flags = ort->rt_flags;
2553 rt->rt_type = ort->rt_type;
2554 rt->rt_dst = ort->rt_dst;
2555 rt->rt_src = ort->rt_src;
2556 rt->rt_iif = ort->rt_iif;
2557 rt->rt_gateway = ort->rt_gateway;
2558 rt->rt_spec_dst = ort->rt_spec_dst;
2559 rt->peer = ort->peer;
2560 if (rt->peer)
2561 atomic_inc(&rt->peer->refcnt);
2562
2563 dst_free(new);
2564 }
2565
2566 dst_release(&(*rp)->u.dst);
2567 *rp = rt;
2568 return (rt ? 0 : -ENOMEM);
2569}
2570
2571int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2572 struct sock *sk, int flags)
2573{
2574 int err;
2575
2576 if ((err = __ip_route_output_key(net, rp, flp)) != 0)
2577 return err;
2578
2579 if (flp->proto) {
2580 if (!flp->fl4_src)
2581 flp->fl4_src = (*rp)->rt_src;
2582 if (!flp->fl4_dst)
2583 flp->fl4_dst = (*rp)->rt_dst;
2584 err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
2585 flags ? XFRM_LOOKUP_WAIT : 0);
2586 if (err == -EREMOTE)
2587 err = ipv4_dst_blackhole(rp, flp);
2588
2589 return err;
2590 }
2591
2592 return 0;
2593}
2594
2595EXPORT_SYMBOL_GPL(ip_route_output_flow);
2596
2597int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2598{
2599 return ip_route_output_flow(net, rp, flp, NULL, 0);
2600}
2601
2602static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2603 int nowait, unsigned int flags)
2604{
2605 struct rtable *rt = skb->rtable;
2606 struct rtmsg *r;
2607 struct nlmsghdr *nlh;
2608 long expires;
2609 u32 id = 0, ts = 0, tsage = 0, error;
2610
2611 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
2612 if (nlh == NULL)
2613 return -EMSGSIZE;
2614
2615 r = nlmsg_data(nlh);
2616 r->rtm_family = AF_INET;
2617 r->rtm_dst_len = 32;
2618 r->rtm_src_len = 0;
2619 r->rtm_tos = rt->fl.fl4_tos;
2620 r->rtm_table = RT_TABLE_MAIN;
2621 NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN);
2622 r->rtm_type = rt->rt_type;
2623 r->rtm_scope = RT_SCOPE_UNIVERSE;
2624 r->rtm_protocol = RTPROT_UNSPEC;
2625 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2626 if (rt->rt_flags & RTCF_NOTIFY)
2627 r->rtm_flags |= RTM_F_NOTIFY;
2628
2629 NLA_PUT_BE32(skb, RTA_DST, rt->rt_dst);
2630
2631 if (rt->fl.fl4_src) {
2632 r->rtm_src_len = 32;
2633 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2634 }
2635 if (rt->u.dst.dev)
2636 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
2637#ifdef CONFIG_NET_CLS_ROUTE
2638 if (rt->u.dst.tclassid)
2639 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
2640#endif
2641 if (rt->fl.iif)
2642 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
2643 else if (rt->rt_src != rt->fl.fl4_src)
2644 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
2645
2646 if (rt->rt_dst != rt->rt_gateway)
2647 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2648
2649 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2650 goto nla_put_failure;
2651
2652 error = rt->u.dst.error;
2653 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
2654 if (rt->peer) {
2655 id = rt->peer->ip_id_count;
2656 if (rt->peer->tcp_ts_stamp) {
2657 ts = rt->peer->tcp_ts;
2658 tsage = get_seconds() - rt->peer->tcp_ts_stamp;
2659 }
2660 }
2661
2662 if (rt->fl.iif) {
2663#ifdef CONFIG_IP_MROUTE
2664 __be32 dst = rt->rt_dst;
2665
2666 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2667 IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
2668 int err = ipmr_get_route(skb, r, nowait);
2669 if (err <= 0) {
2670 if (!nowait) {
2671 if (err == 0)
2672 return 0;
2673 goto nla_put_failure;
2674 } else {
2675 if (err == -EMSGSIZE)
2676 goto nla_put_failure;
2677 error = err;
2678 }
2679 }
2680 } else
2681#endif
2682 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2683 }
2684
2685 if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage,
2686 expires, error) < 0)
2687 goto nla_put_failure;
2688
2689 return nlmsg_end(skb, nlh);
2690
2691nla_put_failure:
2692 nlmsg_cancel(skb, nlh);
2693 return -EMSGSIZE;
2694}
2695
2696static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2697{
2698 struct net *net = sock_net(in_skb->sk);
2699 struct rtmsg *rtm;
2700 struct nlattr *tb[RTA_MAX+1];
2701 struct rtable *rt = NULL;
2702 __be32 dst = 0;
2703 __be32 src = 0;
2704 u32 iif;
2705 int err;
2706 struct sk_buff *skb;
2707
2708 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2709 if (err < 0)
2710 goto errout;
2711
2712 rtm = nlmsg_data(nlh);
2713
2714 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2715 if (skb == NULL) {
2716 err = -ENOBUFS;
2717 goto errout;
2718 }
2719
2720
2721
2722
2723 skb_reset_mac_header(skb);
2724 skb_reset_network_header(skb);
2725
2726
2727 ip_hdr(skb)->protocol = IPPROTO_ICMP;
2728 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2729
2730 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2731 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
2732 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2733
2734 if (iif) {
2735 struct net_device *dev;
2736
2737 dev = __dev_get_by_index(net, iif);
2738 if (dev == NULL) {
2739 err = -ENODEV;
2740 goto errout_free;
2741 }
2742
2743 skb->protocol = htons(ETH_P_IP);
2744 skb->dev = dev;
2745 local_bh_disable();
2746 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2747 local_bh_enable();
2748
2749 rt = skb->rtable;
2750 if (err == 0 && rt->u.dst.error)
2751 err = -rt->u.dst.error;
2752 } else {
2753 struct flowi fl = {
2754 .nl_u = {
2755 .ip4_u = {
2756 .daddr = dst,
2757 .saddr = src,
2758 .tos = rtm->rtm_tos,
2759 },
2760 },
2761 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2762 };
2763 err = ip_route_output_key(net, &rt, &fl);
2764 }
2765
2766 if (err)
2767 goto errout_free;
2768
2769 skb->rtable = rt;
2770 if (rtm->rtm_flags & RTM_F_NOTIFY)
2771 rt->rt_flags |= RTCF_NOTIFY;
2772
2773 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2774 RTM_NEWROUTE, 0, 0);
2775 if (err <= 0)
2776 goto errout_free;
2777
2778 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2779errout:
2780 return err;
2781
2782errout_free:
2783 kfree_skb(skb);
2784 goto errout;
2785}
2786
2787int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2788{
2789 struct rtable *rt;
2790 int h, s_h;
2791 int idx, s_idx;
2792 struct net *net;
2793
2794 net = sock_net(skb->sk);
2795
2796 s_h = cb->args[0];
2797 if (s_h < 0)
2798 s_h = 0;
2799 s_idx = idx = cb->args[1];
2800 for (h = s_h; h <= rt_hash_mask; h++) {
2801 rcu_read_lock_bh();
2802 for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt;
2803 rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
2804 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
2805 continue;
2806 if (rt->rt_genid != atomic_read(&rt_genid))
2807 continue;
2808 skb->dst = dst_clone(&rt->u.dst);
2809 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
2810 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2811 1, NLM_F_MULTI) <= 0) {
2812 dst_release(xchg(&skb->dst, NULL));
2813 rcu_read_unlock_bh();
2814 goto done;
2815 }
2816 dst_release(xchg(&skb->dst, NULL));
2817 }
2818 rcu_read_unlock_bh();
2819 s_idx = 0;
2820 }
2821
2822done:
2823 cb->args[0] = h;
2824 cb->args[1] = idx;
2825 return skb->len;
2826}
2827
2828void ip_rt_multicast_event(struct in_device *in_dev)
2829{
2830 rt_cache_flush(0);
2831}
2832
2833#ifdef CONFIG_SYSCTL
2834static int flush_delay;
2835
2836static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
2837 struct file *filp, void __user *buffer,
2838 size_t *lenp, loff_t *ppos)
2839{
2840 if (write) {
2841 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2842 rt_cache_flush(flush_delay);
2843 return 0;
2844 }
2845
2846 return -EINVAL;
2847}
2848
2849static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
2850 int __user *name,
2851 int nlen,
2852 void __user *oldval,
2853 size_t __user *oldlenp,
2854 void __user *newval,
2855 size_t newlen)
2856{
2857 int delay;
2858 if (newlen != sizeof(int))
2859 return -EINVAL;
2860 if (get_user(delay, (int __user *)newval))
2861 return -EFAULT;
2862 rt_cache_flush(delay);
2863 return 0;
2864}
2865
2866ctl_table ipv4_route_table[] = {
2867 {
2868 .ctl_name = NET_IPV4_ROUTE_FLUSH,
2869 .procname = "flush",
2870 .data = &flush_delay,
2871 .maxlen = sizeof(int),
2872 .mode = 0200,
2873 .proc_handler = &ipv4_sysctl_rtcache_flush,
2874 .strategy = &ipv4_sysctl_rtcache_flush_strategy,
2875 },
2876 {
2877 .ctl_name = NET_IPV4_ROUTE_GC_THRESH,
2878 .procname = "gc_thresh",
2879 .data = &ipv4_dst_ops.gc_thresh,
2880 .maxlen = sizeof(int),
2881 .mode = 0644,
2882 .proc_handler = &proc_dointvec,
2883 },
2884 {
2885 .ctl_name = NET_IPV4_ROUTE_MAX_SIZE,
2886 .procname = "max_size",
2887 .data = &ip_rt_max_size,
2888 .maxlen = sizeof(int),
2889 .mode = 0644,
2890 .proc_handler = &proc_dointvec,
2891 },
2892 {
2893
2894
2895 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL,
2896 .procname = "gc_min_interval",
2897 .data = &ip_rt_gc_min_interval,
2898 .maxlen = sizeof(int),
2899 .mode = 0644,
2900 .proc_handler = &proc_dointvec_jiffies,
2901 .strategy = &sysctl_jiffies,
2902 },
2903 {
2904 .ctl_name = NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS,
2905 .procname = "gc_min_interval_ms",
2906 .data = &ip_rt_gc_min_interval,
2907 .maxlen = sizeof(int),
2908 .mode = 0644,
2909 .proc_handler = &proc_dointvec_ms_jiffies,
2910 .strategy = &sysctl_ms_jiffies,
2911 },
2912 {
2913 .ctl_name = NET_IPV4_ROUTE_GC_TIMEOUT,
2914 .procname = "gc_timeout",
2915 .data = &ip_rt_gc_timeout,
2916 .maxlen = sizeof(int),
2917 .mode = 0644,
2918 .proc_handler = &proc_dointvec_jiffies,
2919 .strategy = &sysctl_jiffies,
2920 },
2921 {
2922 .ctl_name = NET_IPV4_ROUTE_GC_INTERVAL,
2923 .procname = "gc_interval",
2924 .data = &ip_rt_gc_interval,
2925 .maxlen = sizeof(int),
2926 .mode = 0644,
2927 .proc_handler = &proc_dointvec_jiffies,
2928 .strategy = &sysctl_jiffies,
2929 },
2930 {
2931 .ctl_name = NET_IPV4_ROUTE_REDIRECT_LOAD,
2932 .procname = "redirect_load",
2933 .data = &ip_rt_redirect_load,
2934 .maxlen = sizeof(int),
2935 .mode = 0644,
2936 .proc_handler = &proc_dointvec,
2937 },
2938 {
2939 .ctl_name = NET_IPV4_ROUTE_REDIRECT_NUMBER,
2940 .procname = "redirect_number",
2941 .data = &ip_rt_redirect_number,
2942 .maxlen = sizeof(int),
2943 .mode = 0644,
2944 .proc_handler = &proc_dointvec,
2945 },
2946 {
2947 .ctl_name = NET_IPV4_ROUTE_REDIRECT_SILENCE,
2948 .procname = "redirect_silence",
2949 .data = &ip_rt_redirect_silence,
2950 .maxlen = sizeof(int),
2951 .mode = 0644,
2952 .proc_handler = &proc_dointvec,
2953 },
2954 {
2955 .ctl_name = NET_IPV4_ROUTE_ERROR_COST,
2956 .procname = "error_cost",
2957 .data = &ip_rt_error_cost,
2958 .maxlen = sizeof(int),
2959 .mode = 0644,
2960 .proc_handler = &proc_dointvec,
2961 },
2962 {
2963 .ctl_name = NET_IPV4_ROUTE_ERROR_BURST,
2964 .procname = "error_burst",
2965 .data = &ip_rt_error_burst,
2966 .maxlen = sizeof(int),
2967 .mode = 0644,
2968 .proc_handler = &proc_dointvec,
2969 },
2970 {
2971 .ctl_name = NET_IPV4_ROUTE_GC_ELASTICITY,
2972 .procname = "gc_elasticity",
2973 .data = &ip_rt_gc_elasticity,
2974 .maxlen = sizeof(int),
2975 .mode = 0644,
2976 .proc_handler = &proc_dointvec,
2977 },
2978 {
2979 .ctl_name = NET_IPV4_ROUTE_MTU_EXPIRES,
2980 .procname = "mtu_expires",
2981 .data = &ip_rt_mtu_expires,
2982 .maxlen = sizeof(int),
2983 .mode = 0644,
2984 .proc_handler = &proc_dointvec_jiffies,
2985 .strategy = &sysctl_jiffies,
2986 },
2987 {
2988 .ctl_name = NET_IPV4_ROUTE_MIN_PMTU,
2989 .procname = "min_pmtu",
2990 .data = &ip_rt_min_pmtu,
2991 .maxlen = sizeof(int),
2992 .mode = 0644,
2993 .proc_handler = &proc_dointvec,
2994 },
2995 {
2996 .ctl_name = NET_IPV4_ROUTE_MIN_ADVMSS,
2997 .procname = "min_adv_mss",
2998 .data = &ip_rt_min_advmss,
2999 .maxlen = sizeof(int),
3000 .mode = 0644,
3001 .proc_handler = &proc_dointvec,
3002 },
3003 {
3004 .ctl_name = NET_IPV4_ROUTE_SECRET_INTERVAL,
3005 .procname = "secret_interval",
3006 .data = &ip_rt_secret_interval,
3007 .maxlen = sizeof(int),
3008 .mode = 0644,
3009 .proc_handler = &proc_dointvec_jiffies,
3010 .strategy = &sysctl_jiffies,
3011 },
3012 { .ctl_name = 0 }
3013};
3014#endif
3015
3016#ifdef CONFIG_NET_CLS_ROUTE
3017struct ip_rt_acct *ip_rt_acct __read_mostly;
3018#endif
3019
3020static __initdata unsigned long rhash_entries;
3021static int __init set_rhash_entries(char *str)
3022{
3023 if (!str)
3024 return 0;
3025 rhash_entries = simple_strtoul(str, &str, 0);
3026 return 1;
3027}
3028__setup("rhash_entries=", set_rhash_entries);
3029
3030int __init ip_rt_init(void)
3031{
3032 int rc = 0;
3033
3034 atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
3035 (jiffies ^ (jiffies >> 7))));
3036
3037#ifdef CONFIG_NET_CLS_ROUTE
3038 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
3039 if (!ip_rt_acct)
3040 panic("IP: failed to allocate ip_rt_acct\n");
3041#endif
3042
3043 ipv4_dst_ops.kmem_cachep =
3044 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
3045 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
3046
3047 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3048
3049 rt_hash_table = (struct rt_hash_bucket *)
3050 alloc_large_system_hash("IP route cache",
3051 sizeof(struct rt_hash_bucket),
3052 rhash_entries,
3053 (num_physpages >= 128 * 1024) ?
3054 15 : 17,
3055 0,
3056 &rt_hash_log,
3057 &rt_hash_mask,
3058 0);
3059 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
3060 rt_hash_lock_init();
3061
3062 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
3063 ip_rt_max_size = (rt_hash_mask + 1) * 16;
3064
3065 devinet_init();
3066 ip_fib_init();
3067
3068 rt_secret_timer.function = rt_secret_rebuild;
3069 rt_secret_timer.data = 0;
3070 init_timer_deferrable(&rt_secret_timer);
3071
3072
3073
3074
3075 schedule_delayed_work(&expires_work,
3076 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3077
3078 rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
3079 ip_rt_secret_interval;
3080 add_timer(&rt_secret_timer);
3081
3082 if (ip_rt_proc_init())
3083 printk(KERN_ERR "Unable to create route proc files\n");
3084#ifdef CONFIG_XFRM
3085 xfrm_init();
3086 xfrm4_init();
3087#endif
3088 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3089
3090 return rc;
3091}
3092
3093EXPORT_SYMBOL(__ip_select_ident);
3094EXPORT_SYMBOL(ip_route_input);
3095EXPORT_SYMBOL(ip_route_output_key);
3096