1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
58#include <net/netevent.h>
59#include <net/netlink.h>
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71static unsigned int ip6_mtu(const struct dst_entry *dst);
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76static int ip6_dst_gc(struct dst_ops *ops);
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
85
86#ifdef CONFIG_IPV6_ROUTE_INFO
87static struct rt6_info *rt6_add_route_info(struct net *net,
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
90 unsigned int pref);
91static struct rt6_info *rt6_get_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
94#endif
95
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
105 peer = rt6_get_peer_create(rt);
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
132 if (!ipv6_addr_any(p))
133 return (const void *) p;
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
136 return daddr;
137}
138
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
142{
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
146 daddr = choose_neigh_daddr(rt, skb, daddr);
147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154{
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
161 rt->n = n;
162
163 return 0;
164}
165
166static struct dst_ops ip6_dst_ops_template = {
167 .family = AF_INET6,
168 .protocol = cpu_to_be16(ETH_P_IPV6),
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
172 .default_advmss = ip6_default_advmss,
173 .mtu = ip6_mtu,
174 .cow_metrics = ipv6_cow_metrics,
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
180 .redirect = rt6_do_redirect,
181 .local_out = __ip6_local_out,
182 .neigh_lookup = ip6_neigh_lookup,
183};
184
185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186{
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
190}
191
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
194{
195}
196
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
199{
200}
201
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
210 .protocol = cpu_to_be16(ETH_P_IPV6),
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
213 .mtu = ip6_blackhole_mtu,
214 .default_advmss = ip6_default_advmss,
215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
216 .redirect = ip6_rt_blackhole_redirect,
217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
218 .neigh_lookup = ip6_neigh_lookup,
219};
220
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 0,
223};
224
225static const struct rt6_info ip6_null_entry_template = {
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = DST_OBSOLETE_FORCE_CHK,
230 .error = -ENETUNREACH,
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
235 .rt6i_protocol = RTPROT_KERNEL,
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245static const struct rt6_info ip6_prohibit_entry_template = {
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = DST_OBSOLETE_FORCE_CHK,
250 .error = -EACCES,
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
255 .rt6i_protocol = RTPROT_KERNEL,
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
260static const struct rt6_info ip6_blk_hole_entry_template = {
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = DST_OBSOLETE_FORCE_CHK,
265 .error = -EINVAL,
266 .input = dst_discard,
267 .output = dst_discard,
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
270 .rt6i_protocol = RTPROT_KERNEL,
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
277
278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279 struct net_device *dev,
280 int flags,
281 struct fib6_table *table)
282{
283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 0, DST_OBSOLETE_FORCE_CHK, flags);
285
286 if (rt) {
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 rt->rt6i_genid = rt_genid(net);
292 }
293 return rt;
294}
295
296static void ip6_dst_destroy(struct dst_entry *dst)
297{
298 struct rt6_info *rt = (struct rt6_info *)dst;
299 struct inet6_dev *idev = rt->rt6i_idev;
300
301 if (rt->n)
302 neigh_release(rt->n);
303
304 if (!(rt->dst.flags & DST_HOST))
305 dst_destroy_metrics_generic(dst);
306
307 if (idev) {
308 rt->rt6i_idev = NULL;
309 in6_dev_put(idev);
310 }
311
312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 dst_release(dst->from);
314
315 if (rt6_has_peer(rt)) {
316 struct inet_peer *peer = rt6_peer_ptr(rt);
317 inet_putpeer(peer);
318 }
319}
320
321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323static u32 rt6_peer_genid(void)
324{
325 return atomic_read(&__rt6_peer_genid);
326}
327
328void rt6_bind_peer(struct rt6_info *rt, int create)
329{
330 struct inet_peer_base *base;
331 struct inet_peer *peer;
332
333 base = inetpeer_base_ptr(rt->_rt6i_peer);
334 if (!base)
335 return;
336
337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338 if (peer) {
339 if (!rt6_set_peer(rt, peer))
340 inet_putpeer(peer);
341 else
342 rt->rt6i_peer_genid = rt6_peer_genid();
343 }
344}
345
346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 int how)
348{
349 struct rt6_info *rt = (struct rt6_info *)dst;
350 struct inet6_dev *idev = rt->rt6i_idev;
351 struct net_device *loopback_dev =
352 dev_net(dev)->loopback_dev;
353
354 if (dev != loopback_dev) {
355 if (idev && idev->dev == dev) {
356 struct inet6_dev *loopback_idev =
357 in6_dev_get(loopback_dev);
358 if (loopback_idev) {
359 rt->rt6i_idev = loopback_idev;
360 in6_dev_put(idev);
361 }
362 }
363 if (rt->n && rt->n->dev == dev) {
364 rt->n->dev = loopback_dev;
365 dev_hold(loopback_dev);
366 dev_put(dev);
367 }
368 }
369}
370
371static bool rt6_check_expired(const struct rt6_info *rt)
372{
373 if (rt->rt6i_flags & RTF_EXPIRES) {
374 if (time_after(jiffies, rt->dst.expires))
375 return true;
376 } else if (rt->dst.from) {
377 return rt6_check_expired((struct rt6_info *) rt->dst.from);
378 }
379 return false;
380}
381
382static bool rt6_need_strict(const struct in6_addr *daddr)
383{
384 return ipv6_addr_type(daddr) &
385 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
386}
387
388
389
390
391
392static inline struct rt6_info *rt6_device_match(struct net *net,
393 struct rt6_info *rt,
394 const struct in6_addr *saddr,
395 int oif,
396 int flags)
397{
398 struct rt6_info *local = NULL;
399 struct rt6_info *sprt;
400
401 if (!oif && ipv6_addr_any(saddr))
402 goto out;
403
404 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
405 struct net_device *dev = sprt->dst.dev;
406
407 if (oif) {
408 if (dev->ifindex == oif)
409 return sprt;
410 if (dev->flags & IFF_LOOPBACK) {
411 if (!sprt->rt6i_idev ||
412 sprt->rt6i_idev->dev->ifindex != oif) {
413 if (flags & RT6_LOOKUP_F_IFACE && oif)
414 continue;
415 if (local && (!oif ||
416 local->rt6i_idev->dev->ifindex == oif))
417 continue;
418 }
419 local = sprt;
420 }
421 } else {
422 if (ipv6_chk_addr(net, saddr, dev,
423 flags & RT6_LOOKUP_F_IFACE))
424 return sprt;
425 }
426 }
427
428 if (oif) {
429 if (local)
430 return local;
431
432 if (flags & RT6_LOOKUP_F_IFACE)
433 return net->ipv6.ip6_null_entry;
434 }
435out:
436 return rt;
437}
438
439#ifdef CONFIG_IPV6_ROUTER_PREF
440static void rt6_probe(struct rt6_info *rt)
441{
442 struct neighbour *neigh;
443
444
445
446
447
448
449
450
451 neigh = rt ? rt->n : NULL;
452 if (!neigh || (neigh->nud_state & NUD_VALID))
453 return;
454 read_lock_bh(&neigh->lock);
455 if (!(neigh->nud_state & NUD_VALID) &&
456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
457 struct in6_addr mcaddr;
458 struct in6_addr *target;
459
460 neigh->updated = jiffies;
461 read_unlock_bh(&neigh->lock);
462
463 target = (struct in6_addr *)&neigh->primary_key;
464 addrconf_addr_solict_mult(target, &mcaddr);
465 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
466 } else {
467 read_unlock_bh(&neigh->lock);
468 }
469}
470#else
471static inline void rt6_probe(struct rt6_info *rt)
472{
473}
474#endif
475
476
477
478
479static inline int rt6_check_dev(struct rt6_info *rt, int oif)
480{
481 struct net_device *dev = rt->dst.dev;
482 if (!oif || dev->ifindex == oif)
483 return 2;
484 if ((dev->flags & IFF_LOOPBACK) &&
485 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
486 return 1;
487 return 0;
488}
489
490static inline int rt6_check_neigh(struct rt6_info *rt)
491{
492 struct neighbour *neigh;
493 int m;
494
495 neigh = rt->n;
496 if (rt->rt6i_flags & RTF_NONEXTHOP ||
497 !(rt->rt6i_flags & RTF_GATEWAY))
498 m = 1;
499 else if (neigh) {
500 read_lock_bh(&neigh->lock);
501 if (neigh->nud_state & NUD_VALID)
502 m = 2;
503#ifdef CONFIG_IPV6_ROUTER_PREF
504 else if (neigh->nud_state & NUD_FAILED)
505 m = 0;
506#endif
507 else
508 m = 1;
509 read_unlock_bh(&neigh->lock);
510 } else
511 m = 0;
512 return m;
513}
514
515static int rt6_score_route(struct rt6_info *rt, int oif,
516 int strict)
517{
518 int m, n;
519
520 m = rt6_check_dev(rt, oif);
521 if (!m && (strict & RT6_LOOKUP_F_IFACE))
522 return -1;
523#ifdef CONFIG_IPV6_ROUTER_PREF
524 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
525#endif
526 n = rt6_check_neigh(rt);
527 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
528 return -1;
529 return m;
530}
531
532static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
533 int *mpri, struct rt6_info *match)
534{
535 int m;
536
537 if (rt6_check_expired(rt))
538 goto out;
539
540 m = rt6_score_route(rt, oif, strict);
541 if (m < 0)
542 goto out;
543
544 if (m > *mpri) {
545 if (strict & RT6_LOOKUP_F_REACHABLE)
546 rt6_probe(match);
547 *mpri = m;
548 match = rt;
549 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
550 rt6_probe(rt);
551 }
552
553out:
554 return match;
555}
556
557static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
558 struct rt6_info *rr_head,
559 u32 metric, int oif, int strict)
560{
561 struct rt6_info *rt, *match;
562 int mpri = -1;
563
564 match = NULL;
565 for (rt = rr_head; rt && rt->rt6i_metric == metric;
566 rt = rt->dst.rt6_next)
567 match = find_match(rt, oif, strict, &mpri, match);
568 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
569 rt = rt->dst.rt6_next)
570 match = find_match(rt, oif, strict, &mpri, match);
571
572 return match;
573}
574
575static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
576{
577 struct rt6_info *match, *rt0;
578 struct net *net;
579
580 rt0 = fn->rr_ptr;
581 if (!rt0)
582 fn->rr_ptr = rt0 = fn->leaf;
583
584 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
585
586 if (!match &&
587 (strict & RT6_LOOKUP_F_REACHABLE)) {
588 struct rt6_info *next = rt0->dst.rt6_next;
589
590
591 if (!next || next->rt6i_metric != rt0->rt6i_metric)
592 next = fn->leaf;
593
594 if (next != rt0)
595 fn->rr_ptr = next;
596 }
597
598 net = dev_net(rt0->dst.dev);
599 return match ? match : net->ipv6.ip6_null_entry;
600}
601
602#ifdef CONFIG_IPV6_ROUTE_INFO
603int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
604 const struct in6_addr *gwaddr)
605{
606 struct net *net = dev_net(dev);
607 struct route_info *rinfo = (struct route_info *) opt;
608 struct in6_addr prefix_buf, *prefix;
609 unsigned int pref;
610 unsigned long lifetime;
611 struct rt6_info *rt;
612
613 if (len < sizeof(struct route_info)) {
614 return -EINVAL;
615 }
616
617
618 if (rinfo->length > 3) {
619 return -EINVAL;
620 } else if (rinfo->prefix_len > 128) {
621 return -EINVAL;
622 } else if (rinfo->prefix_len > 64) {
623 if (rinfo->length < 2) {
624 return -EINVAL;
625 }
626 } else if (rinfo->prefix_len > 0) {
627 if (rinfo->length < 1) {
628 return -EINVAL;
629 }
630 }
631
632 pref = rinfo->route_pref;
633 if (pref == ICMPV6_ROUTER_PREF_INVALID)
634 return -EINVAL;
635
636 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
637
638 if (rinfo->length == 3)
639 prefix = (struct in6_addr *)rinfo->prefix;
640 else {
641
642 ipv6_addr_prefix(&prefix_buf,
643 (struct in6_addr *)rinfo->prefix,
644 rinfo->prefix_len);
645 prefix = &prefix_buf;
646 }
647
648 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
649 dev->ifindex);
650
651 if (rt && !lifetime) {
652 ip6_del_rt(rt);
653 rt = NULL;
654 }
655
656 if (!rt && lifetime)
657 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
658 pref);
659 else if (rt)
660 rt->rt6i_flags = RTF_ROUTEINFO |
661 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
662
663 if (rt) {
664 if (!addrconf_finite_timeout(lifetime))
665 rt6_clean_expires(rt);
666 else
667 rt6_set_expires(rt, jiffies + HZ * lifetime);
668
669 dst_release(&rt->dst);
670 }
671 return 0;
672}
673#endif
674
675#define BACKTRACK(__net, saddr) \
676do { \
677 if (rt == __net->ipv6.ip6_null_entry) { \
678 struct fib6_node *pn; \
679 while (1) { \
680 if (fn->fn_flags & RTN_TL_ROOT) \
681 goto out; \
682 pn = fn->parent; \
683 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
684 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
685 else \
686 fn = pn; \
687 if (fn->fn_flags & RTN_RTINFO) \
688 goto restart; \
689 } \
690 } \
691} while (0)
692
693static struct rt6_info *ip6_pol_route_lookup(struct net *net,
694 struct fib6_table *table,
695 struct flowi6 *fl6, int flags)
696{
697 struct fib6_node *fn;
698 struct rt6_info *rt;
699
700 read_lock_bh(&table->tb6_lock);
701 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
702restart:
703 rt = fn->leaf;
704 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
705 BACKTRACK(net, &fl6->saddr);
706out:
707 dst_use(&rt->dst, jiffies);
708 read_unlock_bh(&table->tb6_lock);
709 return rt;
710
711}
712
713struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
714 int flags)
715{
716 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
717}
718EXPORT_SYMBOL_GPL(ip6_route_lookup);
719
720struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
721 const struct in6_addr *saddr, int oif, int strict)
722{
723 struct flowi6 fl6 = {
724 .flowi6_oif = oif,
725 .daddr = *daddr,
726 };
727 struct dst_entry *dst;
728 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
729
730 if (saddr) {
731 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
732 flags |= RT6_LOOKUP_F_HAS_SADDR;
733 }
734
735 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
736 if (dst->error == 0)
737 return (struct rt6_info *) dst;
738
739 dst_release(dst);
740
741 return NULL;
742}
743
744EXPORT_SYMBOL(rt6_lookup);
745
746
747
748
749
750
751
752static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
753{
754 int err;
755 struct fib6_table *table;
756
757 table = rt->rt6i_table;
758 write_lock_bh(&table->tb6_lock);
759 err = fib6_add(&table->tb6_root, rt, info);
760 write_unlock_bh(&table->tb6_lock);
761
762 return err;
763}
764
765int ip6_ins_rt(struct rt6_info *rt)
766{
767 struct nl_info info = {
768 .nl_net = dev_net(rt->dst.dev),
769 };
770 return __ip6_ins_rt(rt, &info);
771}
772
773static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
774 const struct in6_addr *daddr,
775 const struct in6_addr *saddr)
776{
777 struct rt6_info *rt;
778
779
780
781
782
783 rt = ip6_rt_copy(ort, daddr);
784
785 if (rt) {
786 int attempts = !in_softirq();
787
788 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
789 if (ort->rt6i_dst.plen != 128 &&
790 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
791 rt->rt6i_flags |= RTF_ANYCAST;
792 rt->rt6i_gateway = *daddr;
793 }
794
795 rt->rt6i_flags |= RTF_CACHE;
796
797#ifdef CONFIG_IPV6_SUBTREES
798 if (rt->rt6i_src.plen && saddr) {
799 rt->rt6i_src.addr = *saddr;
800 rt->rt6i_src.plen = 128;
801 }
802#endif
803
804 retry:
805 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
806 struct net *net = dev_net(rt->dst.dev);
807 int saved_rt_min_interval =
808 net->ipv6.sysctl.ip6_rt_gc_min_interval;
809 int saved_rt_elasticity =
810 net->ipv6.sysctl.ip6_rt_gc_elasticity;
811
812 if (attempts-- > 0) {
813 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
814 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
815
816 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
817
818 net->ipv6.sysctl.ip6_rt_gc_elasticity =
819 saved_rt_elasticity;
820 net->ipv6.sysctl.ip6_rt_gc_min_interval =
821 saved_rt_min_interval;
822 goto retry;
823 }
824
825 net_warn_ratelimited("Neighbour table overflow\n");
826 dst_free(&rt->dst);
827 return NULL;
828 }
829 }
830
831 return rt;
832}
833
834static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
835 const struct in6_addr *daddr)
836{
837 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
838
839 if (rt) {
840 rt->rt6i_flags |= RTF_CACHE;
841 rt->n = neigh_clone(ort->n);
842 }
843 return rt;
844}
845
846static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
847 struct flowi6 *fl6, int flags)
848{
849 struct fib6_node *fn;
850 struct rt6_info *rt, *nrt;
851 int strict = 0;
852 int attempts = 3;
853 int err;
854 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
855
856 strict |= flags & RT6_LOOKUP_F_IFACE;
857
858relookup:
859 read_lock_bh(&table->tb6_lock);
860
861restart_2:
862 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
863
864restart:
865 rt = rt6_select(fn, oif, strict | reachable);
866
867 BACKTRACK(net, &fl6->saddr);
868 if (rt == net->ipv6.ip6_null_entry ||
869 rt->rt6i_flags & RTF_CACHE)
870 goto out;
871
872 dst_hold(&rt->dst);
873 read_unlock_bh(&table->tb6_lock);
874
875 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
876 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
877 else if (!(rt->dst.flags & DST_HOST))
878 nrt = rt6_alloc_clone(rt, &fl6->daddr);
879 else
880 goto out2;
881
882 dst_release(&rt->dst);
883 rt = nrt ? : net->ipv6.ip6_null_entry;
884
885 dst_hold(&rt->dst);
886 if (nrt) {
887 err = ip6_ins_rt(nrt);
888 if (!err)
889 goto out2;
890 }
891
892 if (--attempts <= 0)
893 goto out2;
894
895
896
897
898
899 dst_release(&rt->dst);
900 goto relookup;
901
902out:
903 if (reachable) {
904 reachable = 0;
905 goto restart_2;
906 }
907 dst_hold(&rt->dst);
908 read_unlock_bh(&table->tb6_lock);
909out2:
910 rt->dst.lastuse = jiffies;
911 rt->dst.__use++;
912
913 return rt;
914}
915
916static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
917 struct flowi6 *fl6, int flags)
918{
919 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
920}
921
922static struct dst_entry *ip6_route_input_lookup(struct net *net,
923 struct net_device *dev,
924 struct flowi6 *fl6, int flags)
925{
926 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
927 flags |= RT6_LOOKUP_F_IFACE;
928
929 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
930}
931
932void ip6_route_input(struct sk_buff *skb)
933{
934 const struct ipv6hdr *iph = ipv6_hdr(skb);
935 struct net *net = dev_net(skb->dev);
936 int flags = RT6_LOOKUP_F_HAS_SADDR;
937 struct flowi6 fl6 = {
938 .flowi6_iif = skb->dev->ifindex,
939 .daddr = iph->daddr,
940 .saddr = iph->saddr,
941 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
942 .flowi6_mark = skb->mark,
943 .flowi6_proto = iph->nexthdr,
944 };
945
946 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
947}
948
949static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
950 struct flowi6 *fl6, int flags)
951{
952 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
953}
954
955struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
956 struct flowi6 *fl6)
957{
958 int flags = 0;
959
960 fl6->flowi6_iif = LOOPBACK_IFINDEX;
961
962 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
963 flags |= RT6_LOOKUP_F_IFACE;
964
965 if (!ipv6_addr_any(&fl6->saddr))
966 flags |= RT6_LOOKUP_F_HAS_SADDR;
967 else if (sk)
968 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
969
970 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
971}
972
973EXPORT_SYMBOL(ip6_route_output);
974
975struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
976{
977 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
978 struct dst_entry *new = NULL;
979
980 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
981 if (rt) {
982 new = &rt->dst;
983
984 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
985 rt6_init_peer(rt, net->ipv6.peers);
986
987 new->__use = 1;
988 new->input = dst_discard;
989 new->output = dst_discard;
990
991 if (dst_metrics_read_only(&ort->dst))
992 new->_metrics = ort->dst._metrics;
993 else
994 dst_copy_metrics(new, &ort->dst);
995 rt->rt6i_idev = ort->rt6i_idev;
996 if (rt->rt6i_idev)
997 in6_dev_hold(rt->rt6i_idev);
998
999 rt->rt6i_gateway = ort->rt6i_gateway;
1000 rt->rt6i_flags = ort->rt6i_flags;
1001 rt6_clean_expires(rt);
1002 rt->rt6i_metric = 0;
1003
1004 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005#ifdef CONFIG_IPV6_SUBTREES
1006 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007#endif
1008
1009 dst_free(new);
1010 }
1011
1012 dst_release(dst_orig);
1013 return new ? new : ERR_PTR(-ENOMEM);
1014}
1015
1016
1017
1018
1019
1020static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021{
1022 struct rt6_info *rt;
1023
1024 rt = (struct rt6_info *) dst;
1025
1026
1027
1028
1029
1030 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031 return NULL;
1032
1033 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1035 if (!rt6_has_peer(rt))
1036 rt6_bind_peer(rt, 0);
1037 rt->rt6i_peer_genid = rt6_peer_genid();
1038 }
1039 return dst;
1040 }
1041 return NULL;
1042}
1043
1044static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045{
1046 struct rt6_info *rt = (struct rt6_info *) dst;
1047
1048 if (rt) {
1049 if (rt->rt6i_flags & RTF_CACHE) {
1050 if (rt6_check_expired(rt)) {
1051 ip6_del_rt(rt);
1052 dst = NULL;
1053 }
1054 } else {
1055 dst_release(dst);
1056 dst = NULL;
1057 }
1058 }
1059 return dst;
1060}
1061
1062static void ip6_link_failure(struct sk_buff *skb)
1063{
1064 struct rt6_info *rt;
1065
1066 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1067
1068 rt = (struct rt6_info *) skb_dst(skb);
1069 if (rt) {
1070 if (rt->rt6i_flags & RTF_CACHE)
1071 rt6_update_expires(rt, 0);
1072 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1073 rt->rt6i_node->fn_sernum = -1;
1074 }
1075}
1076
1077static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078 struct sk_buff *skb, u32 mtu)
1079{
1080 struct rt6_info *rt6 = (struct rt6_info*)dst;
1081
1082 dst_confirm(dst);
1083 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1084 struct net *net = dev_net(dst->dev);
1085
1086 rt6->rt6i_flags |= RTF_MODIFIED;
1087 if (mtu < IPV6_MIN_MTU) {
1088 u32 features = dst_metric(dst, RTAX_FEATURES);
1089 mtu = IPV6_MIN_MTU;
1090 features |= RTAX_FEATURE_ALLFRAG;
1091 dst_metric_set(dst, RTAX_FEATURES, features);
1092 }
1093 dst_metric_set(dst, RTAX_MTU, mtu);
1094 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1095 }
1096}
1097
1098void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099 int oif, u32 mark)
1100{
1101 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102 struct dst_entry *dst;
1103 struct flowi6 fl6;
1104
1105 memset(&fl6, 0, sizeof(fl6));
1106 fl6.flowi6_oif = oif;
1107 fl6.flowi6_mark = mark;
1108 fl6.flowi6_flags = 0;
1109 fl6.daddr = iph->daddr;
1110 fl6.saddr = iph->saddr;
1111 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112
1113 dst = ip6_route_output(net, NULL, &fl6);
1114 if (!dst->error)
1115 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1116 dst_release(dst);
1117}
1118EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1119
1120void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121{
1122 ip6_update_pmtu(skb, sock_net(sk), mtu,
1123 sk->sk_bound_dev_if, sk->sk_mark);
1124}
1125EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1126
1127void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128{
1129 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130 struct dst_entry *dst;
1131 struct flowi6 fl6;
1132
1133 memset(&fl6, 0, sizeof(fl6));
1134 fl6.flowi6_oif = oif;
1135 fl6.flowi6_mark = mark;
1136 fl6.flowi6_flags = 0;
1137 fl6.daddr = iph->daddr;
1138 fl6.saddr = iph->saddr;
1139 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140
1141 dst = ip6_route_output(net, NULL, &fl6);
1142 if (!dst->error)
1143 rt6_do_redirect(dst, NULL, skb);
1144 dst_release(dst);
1145}
1146EXPORT_SYMBOL_GPL(ip6_redirect);
1147
1148void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149{
1150 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151}
1152EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1153
1154static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1155{
1156 struct net_device *dev = dst->dev;
1157 unsigned int mtu = dst_mtu(dst);
1158 struct net *net = dev_net(dev);
1159
1160 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161
1162 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1164
1165
1166
1167
1168
1169
1170
1171 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172 mtu = IPV6_MAXPLEN;
1173 return mtu;
1174}
1175
1176static unsigned int ip6_mtu(const struct dst_entry *dst)
1177{
1178 struct inet6_dev *idev;
1179 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180
1181 if (mtu)
1182 return mtu;
1183
1184 mtu = IPV6_MIN_MTU;
1185
1186 rcu_read_lock();
1187 idev = __in6_dev_get(dst->dev);
1188 if (idev)
1189 mtu = idev->cnf.mtu6;
1190 rcu_read_unlock();
1191
1192 return mtu;
1193}
1194
1195static struct dst_entry *icmp6_dst_gc_list;
1196static DEFINE_SPINLOCK(icmp6_dst_lock);
1197
1198struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1199 struct neighbour *neigh,
1200 struct flowi6 *fl6)
1201{
1202 struct dst_entry *dst;
1203 struct rt6_info *rt;
1204 struct inet6_dev *idev = in6_dev_get(dev);
1205 struct net *net = dev_net(dev);
1206
1207 if (unlikely(!idev))
1208 return ERR_PTR(-ENODEV);
1209
1210 rt = ip6_dst_alloc(net, dev, 0, NULL);
1211 if (unlikely(!rt)) {
1212 in6_dev_put(idev);
1213 dst = ERR_PTR(-ENOMEM);
1214 goto out;
1215 }
1216
1217 if (neigh)
1218 neigh_hold(neigh);
1219 else {
1220 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1221 if (IS_ERR(neigh)) {
1222 in6_dev_put(idev);
1223 dst_free(&rt->dst);
1224 return ERR_CAST(neigh);
1225 }
1226 }
1227
1228 rt->dst.flags |= DST_HOST;
1229 rt->dst.output = ip6_output;
1230 rt->n = neigh;
1231 atomic_set(&rt->dst.__refcnt, 1);
1232 rt->rt6i_dst.addr = fl6->daddr;
1233 rt->rt6i_dst.plen = 128;
1234 rt->rt6i_idev = idev;
1235 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1236
1237 spin_lock_bh(&icmp6_dst_lock);
1238 rt->dst.next = icmp6_dst_gc_list;
1239 icmp6_dst_gc_list = &rt->dst;
1240 spin_unlock_bh(&icmp6_dst_lock);
1241
1242 fib6_force_start_gc(net);
1243
1244 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245
1246out:
1247 return dst;
1248}
1249
1250int icmp6_dst_gc(void)
1251{
1252 struct dst_entry *dst, **pprev;
1253 int more = 0;
1254
1255 spin_lock_bh(&icmp6_dst_lock);
1256 pprev = &icmp6_dst_gc_list;
1257
1258 while ((dst = *pprev) != NULL) {
1259 if (!atomic_read(&dst->__refcnt)) {
1260 *pprev = dst->next;
1261 dst_free(dst);
1262 } else {
1263 pprev = &dst->next;
1264 ++more;
1265 }
1266 }
1267
1268 spin_unlock_bh(&icmp6_dst_lock);
1269
1270 return more;
1271}
1272
1273static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274 void *arg)
1275{
1276 struct dst_entry *dst, **pprev;
1277
1278 spin_lock_bh(&icmp6_dst_lock);
1279 pprev = &icmp6_dst_gc_list;
1280 while ((dst = *pprev) != NULL) {
1281 struct rt6_info *rt = (struct rt6_info *) dst;
1282 if (func(rt, arg)) {
1283 *pprev = dst->next;
1284 dst_free(dst);
1285 } else {
1286 pprev = &dst->next;
1287 }
1288 }
1289 spin_unlock_bh(&icmp6_dst_lock);
1290}
1291
1292static int ip6_dst_gc(struct dst_ops *ops)
1293{
1294 unsigned long now = jiffies;
1295 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1296 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1301 int entries;
1302
1303 entries = dst_entries_get_fast(ops);
1304 if (time_after(rt_last_gc + rt_min_interval, now) &&
1305 entries <= rt_max_size)
1306 goto out;
1307
1308 net->ipv6.ip6_rt_gc_expire++;
1309 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310 net->ipv6.ip6_rt_last_gc = now;
1311 entries = dst_entries_get_slow(ops);
1312 if (entries < ops->gc_thresh)
1313 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1314out:
1315 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1316 return entries > rt_max_size;
1317}
1318
1319
1320
1321
1322
1323
1324
1325int ip6_dst_hoplimit(struct dst_entry *dst)
1326{
1327 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1328 if (hoplimit == 0) {
1329 struct net_device *dev = dst->dev;
1330 struct inet6_dev *idev;
1331
1332 rcu_read_lock();
1333 idev = __in6_dev_get(dev);
1334 if (idev)
1335 hoplimit = idev->cnf.hop_limit;
1336 else
1337 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1338 rcu_read_unlock();
1339 }
1340 return hoplimit;
1341}
1342EXPORT_SYMBOL(ip6_dst_hoplimit);
1343
1344
1345
1346
1347
1348int ip6_route_add(struct fib6_config *cfg)
1349{
1350 int err;
1351 struct net *net = cfg->fc_nlinfo.nl_net;
1352 struct rt6_info *rt = NULL;
1353 struct net_device *dev = NULL;
1354 struct inet6_dev *idev = NULL;
1355 struct fib6_table *table;
1356 int addr_type;
1357
1358 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1359 return -EINVAL;
1360#ifndef CONFIG_IPV6_SUBTREES
1361 if (cfg->fc_src_len)
1362 return -EINVAL;
1363#endif
1364 if (cfg->fc_ifindex) {
1365 err = -ENODEV;
1366 dev = dev_get_by_index(net, cfg->fc_ifindex);
1367 if (!dev)
1368 goto out;
1369 idev = in6_dev_get(dev);
1370 if (!idev)
1371 goto out;
1372 }
1373
1374 if (cfg->fc_metric == 0)
1375 cfg->fc_metric = IP6_RT_PRIO_USER;
1376
1377 err = -ENOBUFS;
1378 if (cfg->fc_nlinfo.nlh &&
1379 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1380 table = fib6_get_table(net, cfg->fc_table);
1381 if (!table) {
1382 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1383 table = fib6_new_table(net, cfg->fc_table);
1384 }
1385 } else {
1386 table = fib6_new_table(net, cfg->fc_table);
1387 }
1388
1389 if (!table)
1390 goto out;
1391
1392 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1393
1394 if (!rt) {
1395 err = -ENOMEM;
1396 goto out;
1397 }
1398
1399 if (cfg->fc_flags & RTF_EXPIRES)
1400 rt6_set_expires(rt, jiffies +
1401 clock_t_to_jiffies(cfg->fc_expires));
1402 else
1403 rt6_clean_expires(rt);
1404
1405 if (cfg->fc_protocol == RTPROT_UNSPEC)
1406 cfg->fc_protocol = RTPROT_BOOT;
1407 rt->rt6i_protocol = cfg->fc_protocol;
1408
1409 addr_type = ipv6_addr_type(&cfg->fc_dst);
1410
1411 if (addr_type & IPV6_ADDR_MULTICAST)
1412 rt->dst.input = ip6_mc_input;
1413 else if (cfg->fc_flags & RTF_LOCAL)
1414 rt->dst.input = ip6_input;
1415 else
1416 rt->dst.input = ip6_forward;
1417
1418 rt->dst.output = ip6_output;
1419
1420 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421 rt->rt6i_dst.plen = cfg->fc_dst_len;
1422 if (rt->rt6i_dst.plen == 128)
1423 rt->dst.flags |= DST_HOST;
1424
1425 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427 if (!metrics) {
1428 err = -ENOMEM;
1429 goto out;
1430 }
1431 dst_init_metrics(&rt->dst, metrics, 0);
1432 }
1433#ifdef CONFIG_IPV6_SUBTREES
1434 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435 rt->rt6i_src.plen = cfg->fc_src_len;
1436#endif
1437
1438 rt->rt6i_metric = cfg->fc_metric;
1439
1440
1441
1442
1443 if ((cfg->fc_flags & RTF_REJECT) ||
1444 (dev && (dev->flags & IFF_LOOPBACK) &&
1445 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446 !(cfg->fc_flags & RTF_LOCAL))) {
1447
1448 if (dev != net->loopback_dev) {
1449 if (dev) {
1450 dev_put(dev);
1451 in6_dev_put(idev);
1452 }
1453 dev = net->loopback_dev;
1454 dev_hold(dev);
1455 idev = in6_dev_get(dev);
1456 if (!idev) {
1457 err = -ENODEV;
1458 goto out;
1459 }
1460 }
1461 rt->dst.output = ip6_pkt_discard_out;
1462 rt->dst.input = ip6_pkt_discard;
1463 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1464 switch (cfg->fc_type) {
1465 case RTN_BLACKHOLE:
1466 rt->dst.error = -EINVAL;
1467 break;
1468 case RTN_PROHIBIT:
1469 rt->dst.error = -EACCES;
1470 break;
1471 case RTN_THROW:
1472 rt->dst.error = -EAGAIN;
1473 break;
1474 default:
1475 rt->dst.error = -ENETUNREACH;
1476 break;
1477 }
1478 goto install_route;
1479 }
1480
1481 if (cfg->fc_flags & RTF_GATEWAY) {
1482 const struct in6_addr *gw_addr;
1483 int gwa_type;
1484
1485 gw_addr = &cfg->fc_gateway;
1486 rt->rt6i_gateway = *gw_addr;
1487 gwa_type = ipv6_addr_type(gw_addr);
1488
1489 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490 struct rt6_info *grt;
1491
1492
1493
1494
1495
1496
1497
1498
1499 err = -EINVAL;
1500 if (!(gwa_type & IPV6_ADDR_UNICAST))
1501 goto out;
1502
1503 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1504
1505 err = -EHOSTUNREACH;
1506 if (!grt)
1507 goto out;
1508 if (dev) {
1509 if (dev != grt->dst.dev) {
1510 dst_release(&grt->dst);
1511 goto out;
1512 }
1513 } else {
1514 dev = grt->dst.dev;
1515 idev = grt->rt6i_idev;
1516 dev_hold(dev);
1517 in6_dev_hold(grt->rt6i_idev);
1518 }
1519 if (!(grt->rt6i_flags & RTF_GATEWAY))
1520 err = 0;
1521 dst_release(&grt->dst);
1522
1523 if (err)
1524 goto out;
1525 }
1526 err = -EINVAL;
1527 if (!dev || (dev->flags & IFF_LOOPBACK))
1528 goto out;
1529 }
1530
1531 err = -ENODEV;
1532 if (!dev)
1533 goto out;
1534
1535 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537 err = -EINVAL;
1538 goto out;
1539 }
1540 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1541 rt->rt6i_prefsrc.plen = 128;
1542 } else
1543 rt->rt6i_prefsrc.plen = 0;
1544
1545 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1546 err = rt6_bind_neighbour(rt, dev);
1547 if (err)
1548 goto out;
1549 }
1550
1551 rt->rt6i_flags = cfg->fc_flags;
1552
1553install_route:
1554 if (cfg->fc_mx) {
1555 struct nlattr *nla;
1556 int remaining;
1557
1558 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1559 int type = nla_type(nla);
1560
1561 if (type) {
1562 if (type > RTAX_MAX) {
1563 err = -EINVAL;
1564 goto out;
1565 }
1566
1567 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1568 }
1569 }
1570 }
1571
1572 rt->dst.dev = dev;
1573 rt->rt6i_idev = idev;
1574 rt->rt6i_table = table;
1575
1576 cfg->fc_nlinfo.nl_net = dev_net(dev);
1577
1578 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1579
1580out:
1581 if (dev)
1582 dev_put(dev);
1583 if (idev)
1584 in6_dev_put(idev);
1585 if (rt)
1586 dst_free(&rt->dst);
1587 return err;
1588}
1589
1590static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1591{
1592 int err;
1593 struct fib6_table *table;
1594 struct net *net = dev_net(rt->dst.dev);
1595
1596 if (rt == net->ipv6.ip6_null_entry) {
1597 err = -ENOENT;
1598 goto out;
1599 }
1600
1601 table = rt->rt6i_table;
1602 write_lock_bh(&table->tb6_lock);
1603 err = fib6_del(rt, info);
1604 write_unlock_bh(&table->tb6_lock);
1605
1606out:
1607 dst_release(&rt->dst);
1608 return err;
1609}
1610
1611int ip6_del_rt(struct rt6_info *rt)
1612{
1613 struct nl_info info = {
1614 .nl_net = dev_net(rt->dst.dev),
1615 };
1616 return __ip6_del_rt(rt, &info);
1617}
1618
1619static int ip6_route_del(struct fib6_config *cfg)
1620{
1621 struct fib6_table *table;
1622 struct fib6_node *fn;
1623 struct rt6_info *rt;
1624 int err = -ESRCH;
1625
1626 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1627 if (!table)
1628 return err;
1629
1630 read_lock_bh(&table->tb6_lock);
1631
1632 fn = fib6_locate(&table->tb6_root,
1633 &cfg->fc_dst, cfg->fc_dst_len,
1634 &cfg->fc_src, cfg->fc_src_len);
1635
1636 if (fn) {
1637 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1638 if (cfg->fc_ifindex &&
1639 (!rt->dst.dev ||
1640 rt->dst.dev->ifindex != cfg->fc_ifindex))
1641 continue;
1642 if (cfg->fc_flags & RTF_GATEWAY &&
1643 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1644 continue;
1645 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1646 continue;
1647 dst_hold(&rt->dst);
1648 read_unlock_bh(&table->tb6_lock);
1649
1650 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1651 }
1652 }
1653 read_unlock_bh(&table->tb6_lock);
1654
1655 return err;
1656}
1657
1658static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1659{
1660 struct net *net = dev_net(skb->dev);
1661 struct netevent_redirect netevent;
1662 struct rt6_info *rt, *nrt = NULL;
1663 const struct in6_addr *target;
1664 struct ndisc_options ndopts;
1665 const struct in6_addr *dest;
1666 struct neighbour *old_neigh;
1667 struct inet6_dev *in6_dev;
1668 struct neighbour *neigh;
1669 struct icmp6hdr *icmph;
1670 int optlen, on_link;
1671 u8 *lladdr;
1672
1673 optlen = skb->tail - skb->transport_header;
1674 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1675
1676 if (optlen < 0) {
1677 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1678 return;
1679 }
1680
1681 icmph = icmp6_hdr(skb);
1682 target = (const struct in6_addr *) (icmph + 1);
1683 dest = target + 1;
1684
1685 if (ipv6_addr_is_multicast(dest)) {
1686 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1687 return;
1688 }
1689
1690 on_link = 0;
1691 if (ipv6_addr_equal(dest, target)) {
1692 on_link = 1;
1693 } else if (ipv6_addr_type(target) !=
1694 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1695 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1696 return;
1697 }
1698
1699 in6_dev = __in6_dev_get(skb->dev);
1700 if (!in6_dev)
1701 return;
1702 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1703 return;
1704
1705
1706
1707
1708
1709
1710 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1711 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1712 return;
1713 }
1714
1715 lladdr = NULL;
1716 if (ndopts.nd_opts_tgt_lladdr) {
1717 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1718 skb->dev);
1719 if (!lladdr) {
1720 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1721 return;
1722 }
1723 }
1724
1725 rt = (struct rt6_info *) dst;
1726 if (rt == net->ipv6.ip6_null_entry) {
1727 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1728 return;
1729 }
1730
1731
1732
1733
1734
1735 dst_confirm(&rt->dst);
1736
1737 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1738 if (!neigh)
1739 return;
1740
1741
1742 old_neigh = rt->n;
1743 if (neigh == old_neigh)
1744 goto out;
1745
1746
1747
1748
1749
1750 neigh_update(neigh, lladdr, NUD_STALE,
1751 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1752 NEIGH_UPDATE_F_OVERRIDE|
1753 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1754 NEIGH_UPDATE_F_ISROUTER))
1755 );
1756
1757 nrt = ip6_rt_copy(rt, dest);
1758 if (!nrt)
1759 goto out;
1760
1761 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1762 if (on_link)
1763 nrt->rt6i_flags &= ~RTF_GATEWAY;
1764
1765 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1766 nrt->n = neigh_clone(neigh);
1767
1768 if (ip6_ins_rt(nrt))
1769 goto out;
1770
1771 netevent.old = &rt->dst;
1772 netevent.old_neigh = old_neigh;
1773 netevent.new = &nrt->dst;
1774 netevent.new_neigh = neigh;
1775 netevent.daddr = dest;
1776 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1777
1778 if (rt->rt6i_flags & RTF_CACHE) {
1779 rt = (struct rt6_info *) dst_clone(&rt->dst);
1780 ip6_del_rt(rt);
1781 }
1782
1783out:
1784 neigh_release(neigh);
1785}
1786
1787
1788
1789
1790
1791static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1792 const struct in6_addr *dest)
1793{
1794 struct net *net = dev_net(ort->dst.dev);
1795 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1796 ort->rt6i_table);
1797
1798 if (rt) {
1799 rt->dst.input = ort->dst.input;
1800 rt->dst.output = ort->dst.output;
1801 rt->dst.flags |= DST_HOST;
1802
1803 rt->rt6i_dst.addr = *dest;
1804 rt->rt6i_dst.plen = 128;
1805 dst_copy_metrics(&rt->dst, &ort->dst);
1806 rt->dst.error = ort->dst.error;
1807 rt->rt6i_idev = ort->rt6i_idev;
1808 if (rt->rt6i_idev)
1809 in6_dev_hold(rt->rt6i_idev);
1810 rt->dst.lastuse = jiffies;
1811
1812 rt->rt6i_gateway = ort->rt6i_gateway;
1813 rt->rt6i_flags = ort->rt6i_flags;
1814 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1815 (RTF_DEFAULT | RTF_ADDRCONF))
1816 rt6_set_from(rt, ort);
1817 else
1818 rt6_clean_expires(rt);
1819 rt->rt6i_metric = 0;
1820
1821#ifdef CONFIG_IPV6_SUBTREES
1822 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823#endif
1824 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1825 rt->rt6i_table = ort->rt6i_table;
1826 }
1827 return rt;
1828}
1829
1830#ifdef CONFIG_IPV6_ROUTE_INFO
1831static struct rt6_info *rt6_get_route_info(struct net *net,
1832 const struct in6_addr *prefix, int prefixlen,
1833 const struct in6_addr *gwaddr, int ifindex)
1834{
1835 struct fib6_node *fn;
1836 struct rt6_info *rt = NULL;
1837 struct fib6_table *table;
1838
1839 table = fib6_get_table(net, RT6_TABLE_INFO);
1840 if (!table)
1841 return NULL;
1842
1843 read_lock_bh(&table->tb6_lock);
1844 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1845 if (!fn)
1846 goto out;
1847
1848 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1849 if (rt->dst.dev->ifindex != ifindex)
1850 continue;
1851 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852 continue;
1853 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854 continue;
1855 dst_hold(&rt->dst);
1856 break;
1857 }
1858out:
1859 read_unlock_bh(&table->tb6_lock);
1860 return rt;
1861}
1862
1863static struct rt6_info *rt6_add_route_info(struct net *net,
1864 const struct in6_addr *prefix, int prefixlen,
1865 const struct in6_addr *gwaddr, int ifindex,
1866 unsigned int pref)
1867{
1868 struct fib6_config cfg = {
1869 .fc_table = RT6_TABLE_INFO,
1870 .fc_metric = IP6_RT_PRIO_USER,
1871 .fc_ifindex = ifindex,
1872 .fc_dst_len = prefixlen,
1873 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874 RTF_UP | RTF_PREF(pref),
1875 .fc_nlinfo.portid = 0,
1876 .fc_nlinfo.nlh = NULL,
1877 .fc_nlinfo.nl_net = net,
1878 };
1879
1880 cfg.fc_dst = *prefix;
1881 cfg.fc_gateway = *gwaddr;
1882
1883
1884 if (!prefixlen)
1885 cfg.fc_flags |= RTF_DEFAULT;
1886
1887 ip6_route_add(&cfg);
1888
1889 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1890}
1891#endif
1892
1893struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1894{
1895 struct rt6_info *rt;
1896 struct fib6_table *table;
1897
1898 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1899 if (!table)
1900 return NULL;
1901
1902 read_lock_bh(&table->tb6_lock);
1903 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1904 if (dev == rt->dst.dev &&
1905 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1906 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907 break;
1908 }
1909 if (rt)
1910 dst_hold(&rt->dst);
1911 read_unlock_bh(&table->tb6_lock);
1912 return rt;
1913}
1914
1915struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1916 struct net_device *dev,
1917 unsigned int pref)
1918{
1919 struct fib6_config cfg = {
1920 .fc_table = RT6_TABLE_DFLT,
1921 .fc_metric = IP6_RT_PRIO_USER,
1922 .fc_ifindex = dev->ifindex,
1923 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1925 .fc_nlinfo.portid = 0,
1926 .fc_nlinfo.nlh = NULL,
1927 .fc_nlinfo.nl_net = dev_net(dev),
1928 };
1929
1930 cfg.fc_gateway = *gwaddr;
1931
1932 ip6_route_add(&cfg);
1933
1934 return rt6_get_dflt_router(gwaddr, dev);
1935}
1936
1937void rt6_purge_dflt_routers(struct net *net)
1938{
1939 struct rt6_info *rt;
1940 struct fib6_table *table;
1941
1942
1943 table = fib6_get_table(net, RT6_TABLE_DFLT);
1944 if (!table)
1945 return;
1946
1947restart:
1948 read_lock_bh(&table->tb6_lock);
1949 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1950 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1951 dst_hold(&rt->dst);
1952 read_unlock_bh(&table->tb6_lock);
1953 ip6_del_rt(rt);
1954 goto restart;
1955 }
1956 }
1957 read_unlock_bh(&table->tb6_lock);
1958}
1959
1960static void rtmsg_to_fib6_config(struct net *net,
1961 struct in6_rtmsg *rtmsg,
1962 struct fib6_config *cfg)
1963{
1964 memset(cfg, 0, sizeof(*cfg));
1965
1966 cfg->fc_table = RT6_TABLE_MAIN;
1967 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968 cfg->fc_metric = rtmsg->rtmsg_metric;
1969 cfg->fc_expires = rtmsg->rtmsg_info;
1970 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972 cfg->fc_flags = rtmsg->rtmsg_flags;
1973
1974 cfg->fc_nlinfo.nl_net = net;
1975
1976 cfg->fc_dst = rtmsg->rtmsg_dst;
1977 cfg->fc_src = rtmsg->rtmsg_src;
1978 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1979}
1980
1981int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1982{
1983 struct fib6_config cfg;
1984 struct in6_rtmsg rtmsg;
1985 int err;
1986
1987 switch(cmd) {
1988 case SIOCADDRT:
1989 case SIOCDELRT:
1990 if (!capable(CAP_NET_ADMIN))
1991 return -EPERM;
1992 err = copy_from_user(&rtmsg, arg,
1993 sizeof(struct in6_rtmsg));
1994 if (err)
1995 return -EFAULT;
1996
1997 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1998
1999 rtnl_lock();
2000 switch (cmd) {
2001 case SIOCADDRT:
2002 err = ip6_route_add(&cfg);
2003 break;
2004 case SIOCDELRT:
2005 err = ip6_route_del(&cfg);
2006 break;
2007 default:
2008 err = -EINVAL;
2009 }
2010 rtnl_unlock();
2011
2012 return err;
2013 }
2014
2015 return -EINVAL;
2016}
2017
2018
2019
2020
2021
2022static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2023{
2024 int type;
2025 struct dst_entry *dst = skb_dst(skb);
2026 switch (ipstats_mib_noroutes) {
2027 case IPSTATS_MIB_INNOROUTES:
2028 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2029 if (type == IPV6_ADDR_ANY) {
2030 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031 IPSTATS_MIB_INADDRERRORS);
2032 break;
2033 }
2034
2035 case IPSTATS_MIB_OUTNOROUTES:
2036 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037 ipstats_mib_noroutes);
2038 break;
2039 }
2040 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2041 kfree_skb(skb);
2042 return 0;
2043}
2044
2045static int ip6_pkt_discard(struct sk_buff *skb)
2046{
2047 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2048}
2049
2050static int ip6_pkt_discard_out(struct sk_buff *skb)
2051{
2052 skb->dev = skb_dst(skb)->dev;
2053 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2054}
2055
2056#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057
2058static int ip6_pkt_prohibit(struct sk_buff *skb)
2059{
2060 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2061}
2062
2063static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064{
2065 skb->dev = skb_dst(skb)->dev;
2066 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2067}
2068
2069#endif
2070
2071
2072
2073
2074
2075struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076 const struct in6_addr *addr,
2077 bool anycast)
2078{
2079 struct net *net = dev_net(idev->dev);
2080 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2081 int err;
2082
2083 if (!rt) {
2084 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2085 return ERR_PTR(-ENOMEM);
2086 }
2087
2088 in6_dev_hold(idev);
2089
2090 rt->dst.flags |= DST_HOST;
2091 rt->dst.input = ip6_input;
2092 rt->dst.output = ip6_output;
2093 rt->rt6i_idev = idev;
2094
2095 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2096 if (anycast)
2097 rt->rt6i_flags |= RTF_ANYCAST;
2098 else
2099 rt->rt6i_flags |= RTF_LOCAL;
2100 err = rt6_bind_neighbour(rt, rt->dst.dev);
2101 if (err) {
2102 dst_free(&rt->dst);
2103 return ERR_PTR(err);
2104 }
2105
2106 rt->rt6i_dst.addr = *addr;
2107 rt->rt6i_dst.plen = 128;
2108 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2109
2110 atomic_set(&rt->dst.__refcnt, 1);
2111
2112 return rt;
2113}
2114
2115int ip6_route_get_saddr(struct net *net,
2116 struct rt6_info *rt,
2117 const struct in6_addr *daddr,
2118 unsigned int prefs,
2119 struct in6_addr *saddr)
2120{
2121 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2122 int err = 0;
2123 if (rt->rt6i_prefsrc.plen)
2124 *saddr = rt->rt6i_prefsrc.addr;
2125 else
2126 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2127 daddr, prefs, saddr);
2128 return err;
2129}
2130
2131
2132struct arg_dev_net_ip {
2133 struct net_device *dev;
2134 struct net *net;
2135 struct in6_addr *addr;
2136};
2137
2138static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2139{
2140 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2141 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2142 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2143
2144 if (((void *)rt->dst.dev == dev || !dev) &&
2145 rt != net->ipv6.ip6_null_entry &&
2146 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2147
2148 rt->rt6i_prefsrc.plen = 0;
2149 }
2150 return 0;
2151}
2152
2153void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2154{
2155 struct net *net = dev_net(ifp->idev->dev);
2156 struct arg_dev_net_ip adni = {
2157 .dev = ifp->idev->dev,
2158 .net = net,
2159 .addr = &ifp->addr,
2160 };
2161 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2162}
2163
2164struct arg_dev_net {
2165 struct net_device *dev;
2166 struct net *net;
2167};
2168
2169static int fib6_ifdown(struct rt6_info *rt, void *arg)
2170{
2171 const struct arg_dev_net *adn = arg;
2172 const struct net_device *dev = adn->dev;
2173
2174 if ((rt->dst.dev == dev || !dev) &&
2175 rt != adn->net->ipv6.ip6_null_entry)
2176 return -1;
2177
2178 return 0;
2179}
2180
2181void rt6_ifdown(struct net *net, struct net_device *dev)
2182{
2183 struct arg_dev_net adn = {
2184 .dev = dev,
2185 .net = net,
2186 };
2187
2188 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2189 icmp6_clean_all(fib6_ifdown, &adn);
2190}
2191
2192struct rt6_mtu_change_arg {
2193 struct net_device *dev;
2194 unsigned int mtu;
2195};
2196
2197static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2198{
2199 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2200 struct inet6_dev *idev;
2201
2202
2203
2204
2205
2206
2207
2208 idev = __in6_dev_get(arg->dev);
2209 if (!idev)
2210 return 0;
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226 if (rt->dst.dev == arg->dev &&
2227 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2228 (dst_mtu(&rt->dst) >= arg->mtu ||
2229 (dst_mtu(&rt->dst) < arg->mtu &&
2230 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2231 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2232 }
2233 return 0;
2234}
2235
2236void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2237{
2238 struct rt6_mtu_change_arg arg = {
2239 .dev = dev,
2240 .mtu = mtu,
2241 };
2242
2243 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2244}
2245
2246static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2247 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2248 [RTA_OIF] = { .type = NLA_U32 },
2249 [RTA_IIF] = { .type = NLA_U32 },
2250 [RTA_PRIORITY] = { .type = NLA_U32 },
2251 [RTA_METRICS] = { .type = NLA_NESTED },
2252};
2253
2254static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2255 struct fib6_config *cfg)
2256{
2257 struct rtmsg *rtm;
2258 struct nlattr *tb[RTA_MAX+1];
2259 int err;
2260
2261 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2262 if (err < 0)
2263 goto errout;
2264
2265 err = -EINVAL;
2266 rtm = nlmsg_data(nlh);
2267 memset(cfg, 0, sizeof(*cfg));
2268
2269 cfg->fc_table = rtm->rtm_table;
2270 cfg->fc_dst_len = rtm->rtm_dst_len;
2271 cfg->fc_src_len = rtm->rtm_src_len;
2272 cfg->fc_flags = RTF_UP;
2273 cfg->fc_protocol = rtm->rtm_protocol;
2274 cfg->fc_type = rtm->rtm_type;
2275
2276 if (rtm->rtm_type == RTN_UNREACHABLE ||
2277 rtm->rtm_type == RTN_BLACKHOLE ||
2278 rtm->rtm_type == RTN_PROHIBIT ||
2279 rtm->rtm_type == RTN_THROW)
2280 cfg->fc_flags |= RTF_REJECT;
2281
2282 if (rtm->rtm_type == RTN_LOCAL)
2283 cfg->fc_flags |= RTF_LOCAL;
2284
2285 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2286 cfg->fc_nlinfo.nlh = nlh;
2287 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2288
2289 if (tb[RTA_GATEWAY]) {
2290 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2291 cfg->fc_flags |= RTF_GATEWAY;
2292 }
2293
2294 if (tb[RTA_DST]) {
2295 int plen = (rtm->rtm_dst_len + 7) >> 3;
2296
2297 if (nla_len(tb[RTA_DST]) < plen)
2298 goto errout;
2299
2300 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2301 }
2302
2303 if (tb[RTA_SRC]) {
2304 int plen = (rtm->rtm_src_len + 7) >> 3;
2305
2306 if (nla_len(tb[RTA_SRC]) < plen)
2307 goto errout;
2308
2309 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2310 }
2311
2312 if (tb[RTA_PREFSRC])
2313 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2314
2315 if (tb[RTA_OIF])
2316 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2317
2318 if (tb[RTA_PRIORITY])
2319 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2320
2321 if (tb[RTA_METRICS]) {
2322 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2323 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2324 }
2325
2326 if (tb[RTA_TABLE])
2327 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2328
2329 err = 0;
2330errout:
2331 return err;
2332}
2333
2334static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335{
2336 struct fib6_config cfg;
2337 int err;
2338
2339 err = rtm_to_fib6_config(skb, nlh, &cfg);
2340 if (err < 0)
2341 return err;
2342
2343 return ip6_route_del(&cfg);
2344}
2345
2346static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2347{
2348 struct fib6_config cfg;
2349 int err;
2350
2351 err = rtm_to_fib6_config(skb, nlh, &cfg);
2352 if (err < 0)
2353 return err;
2354
2355 return ip6_route_add(&cfg);
2356}
2357
2358static inline size_t rt6_nlmsg_size(void)
2359{
2360 return NLMSG_ALIGN(sizeof(struct rtmsg))
2361 + nla_total_size(16)
2362 + nla_total_size(16)
2363 + nla_total_size(16)
2364 + nla_total_size(16)
2365 + nla_total_size(4)
2366 + nla_total_size(4)
2367 + nla_total_size(4)
2368 + nla_total_size(4)
2369 + RTAX_MAX * nla_total_size(4)
2370 + nla_total_size(sizeof(struct rta_cacheinfo));
2371}
2372
2373static int rt6_fill_node(struct net *net,
2374 struct sk_buff *skb, struct rt6_info *rt,
2375 struct in6_addr *dst, struct in6_addr *src,
2376 int iif, int type, u32 portid, u32 seq,
2377 int prefix, int nowait, unsigned int flags)
2378{
2379 struct rtmsg *rtm;
2380 struct nlmsghdr *nlh;
2381 long expires;
2382 u32 table;
2383 struct neighbour *n;
2384
2385 if (prefix) {
2386 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2387
2388 return 1;
2389 }
2390 }
2391
2392 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2393 if (!nlh)
2394 return -EMSGSIZE;
2395
2396 rtm = nlmsg_data(nlh);
2397 rtm->rtm_family = AF_INET6;
2398 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2399 rtm->rtm_src_len = rt->rt6i_src.plen;
2400 rtm->rtm_tos = 0;
2401 if (rt->rt6i_table)
2402 table = rt->rt6i_table->tb6_id;
2403 else
2404 table = RT6_TABLE_UNSPEC;
2405 rtm->rtm_table = table;
2406 if (nla_put_u32(skb, RTA_TABLE, table))
2407 goto nla_put_failure;
2408 if (rt->rt6i_flags & RTF_REJECT) {
2409 switch (rt->dst.error) {
2410 case -EINVAL:
2411 rtm->rtm_type = RTN_BLACKHOLE;
2412 break;
2413 case -EACCES:
2414 rtm->rtm_type = RTN_PROHIBIT;
2415 break;
2416 case -EAGAIN:
2417 rtm->rtm_type = RTN_THROW;
2418 break;
2419 default:
2420 rtm->rtm_type = RTN_UNREACHABLE;
2421 break;
2422 }
2423 }
2424 else if (rt->rt6i_flags & RTF_LOCAL)
2425 rtm->rtm_type = RTN_LOCAL;
2426 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2427 rtm->rtm_type = RTN_LOCAL;
2428 else
2429 rtm->rtm_type = RTN_UNICAST;
2430 rtm->rtm_flags = 0;
2431 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2432 rtm->rtm_protocol = rt->rt6i_protocol;
2433 if (rt->rt6i_flags & RTF_DYNAMIC)
2434 rtm->rtm_protocol = RTPROT_REDIRECT;
2435 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2436 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2437 rtm->rtm_protocol = RTPROT_RA;
2438 else
2439 rtm->rtm_protocol = RTPROT_KERNEL;
2440 }
2441
2442 if (rt->rt6i_flags & RTF_CACHE)
2443 rtm->rtm_flags |= RTM_F_CLONED;
2444
2445 if (dst) {
2446 if (nla_put(skb, RTA_DST, 16, dst))
2447 goto nla_put_failure;
2448 rtm->rtm_dst_len = 128;
2449 } else if (rtm->rtm_dst_len)
2450 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2451 goto nla_put_failure;
2452#ifdef CONFIG_IPV6_SUBTREES
2453 if (src) {
2454 if (nla_put(skb, RTA_SRC, 16, src))
2455 goto nla_put_failure;
2456 rtm->rtm_src_len = 128;
2457 } else if (rtm->rtm_src_len &&
2458 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2459 goto nla_put_failure;
2460#endif
2461 if (iif) {
2462#ifdef CONFIG_IPV6_MROUTE
2463 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2464 int err = ip6mr_get_route(net, skb, rtm, nowait);
2465 if (err <= 0) {
2466 if (!nowait) {
2467 if (err == 0)
2468 return 0;
2469 goto nla_put_failure;
2470 } else {
2471 if (err == -EMSGSIZE)
2472 goto nla_put_failure;
2473 }
2474 }
2475 } else
2476#endif
2477 if (nla_put_u32(skb, RTA_IIF, iif))
2478 goto nla_put_failure;
2479 } else if (dst) {
2480 struct in6_addr saddr_buf;
2481 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2482 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2483 goto nla_put_failure;
2484 }
2485
2486 if (rt->rt6i_prefsrc.plen) {
2487 struct in6_addr saddr_buf;
2488 saddr_buf = rt->rt6i_prefsrc.addr;
2489 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2490 goto nla_put_failure;
2491 }
2492
2493 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2494 goto nla_put_failure;
2495
2496 n = rt->n;
2497 if (n) {
2498 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2499 goto nla_put_failure;
2500 }
2501
2502 if (rt->dst.dev &&
2503 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2504 goto nla_put_failure;
2505 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2506 goto nla_put_failure;
2507
2508 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2509
2510 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2511 goto nla_put_failure;
2512
2513 return nlmsg_end(skb, nlh);
2514
2515nla_put_failure:
2516 nlmsg_cancel(skb, nlh);
2517 return -EMSGSIZE;
2518}
2519
2520int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2521{
2522 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2523 int prefix;
2524
2525 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2526 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2527 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2528 } else
2529 prefix = 0;
2530
2531 return rt6_fill_node(arg->net,
2532 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2533 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2534 prefix, 0, NLM_F_MULTI);
2535}
2536
2537static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2538{
2539 struct net *net = sock_net(in_skb->sk);
2540 struct nlattr *tb[RTA_MAX+1];
2541 struct rt6_info *rt;
2542 struct sk_buff *skb;
2543 struct rtmsg *rtm;
2544 struct flowi6 fl6;
2545 int err, iif = 0, oif = 0;
2546
2547 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2548 if (err < 0)
2549 goto errout;
2550
2551 err = -EINVAL;
2552 memset(&fl6, 0, sizeof(fl6));
2553
2554 if (tb[RTA_SRC]) {
2555 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2556 goto errout;
2557
2558 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2559 }
2560
2561 if (tb[RTA_DST]) {
2562 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2563 goto errout;
2564
2565 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2566 }
2567
2568 if (tb[RTA_IIF])
2569 iif = nla_get_u32(tb[RTA_IIF]);
2570
2571 if (tb[RTA_OIF])
2572 oif = nla_get_u32(tb[RTA_OIF]);
2573
2574 if (iif) {
2575 struct net_device *dev;
2576 int flags = 0;
2577
2578 dev = __dev_get_by_index(net, iif);
2579 if (!dev) {
2580 err = -ENODEV;
2581 goto errout;
2582 }
2583
2584 fl6.flowi6_iif = iif;
2585
2586 if (!ipv6_addr_any(&fl6.saddr))
2587 flags |= RT6_LOOKUP_F_HAS_SADDR;
2588
2589 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2590 flags);
2591 } else {
2592 fl6.flowi6_oif = oif;
2593
2594 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2595 }
2596
2597 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2598 if (!skb) {
2599 dst_release(&rt->dst);
2600 err = -ENOBUFS;
2601 goto errout;
2602 }
2603
2604
2605
2606
2607 skb_reset_mac_header(skb);
2608 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2609
2610 skb_dst_set(skb, &rt->dst);
2611
2612 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2613 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2614 nlh->nlmsg_seq, 0, 0, 0);
2615 if (err < 0) {
2616 kfree_skb(skb);
2617 goto errout;
2618 }
2619
2620 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2621errout:
2622 return err;
2623}
2624
2625void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2626{
2627 struct sk_buff *skb;
2628 struct net *net = info->nl_net;
2629 u32 seq;
2630 int err;
2631
2632 err = -ENOBUFS;
2633 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2634
2635 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2636 if (!skb)
2637 goto errout;
2638
2639 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2640 event, info->portid, seq, 0, 0, 0);
2641 if (err < 0) {
2642
2643 WARN_ON(err == -EMSGSIZE);
2644 kfree_skb(skb);
2645 goto errout;
2646 }
2647 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2648 info->nlh, gfp_any());
2649 return;
2650errout:
2651 if (err < 0)
2652 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2653}
2654
2655static int ip6_route_dev_notify(struct notifier_block *this,
2656 unsigned long event, void *data)
2657{
2658 struct net_device *dev = (struct net_device *)data;
2659 struct net *net = dev_net(dev);
2660
2661 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2662 net->ipv6.ip6_null_entry->dst.dev = dev;
2663 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2664#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2665 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2666 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2667 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2668 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2669#endif
2670 }
2671
2672 return NOTIFY_OK;
2673}
2674
2675
2676
2677
2678
2679#ifdef CONFIG_PROC_FS
2680
2681struct rt6_proc_arg
2682{
2683 char *buffer;
2684 int offset;
2685 int length;
2686 int skip;
2687 int len;
2688};
2689
2690static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2691{
2692 struct seq_file *m = p_arg;
2693 struct neighbour *n;
2694
2695 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2696
2697#ifdef CONFIG_IPV6_SUBTREES
2698 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2699#else
2700 seq_puts(m, "00000000000000000000000000000000 00 ");
2701#endif
2702 n = rt->n;
2703 if (n) {
2704 seq_printf(m, "%pi6", n->primary_key);
2705 } else {
2706 seq_puts(m, "00000000000000000000000000000000");
2707 }
2708 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2709 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2710 rt->dst.__use, rt->rt6i_flags,
2711 rt->dst.dev ? rt->dst.dev->name : "");
2712 return 0;
2713}
2714
2715static int ipv6_route_show(struct seq_file *m, void *v)
2716{
2717 struct net *net = (struct net *)m->private;
2718 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2719 return 0;
2720}
2721
2722static int ipv6_route_open(struct inode *inode, struct file *file)
2723{
2724 return single_open_net(inode, file, ipv6_route_show);
2725}
2726
2727static const struct file_operations ipv6_route_proc_fops = {
2728 .owner = THIS_MODULE,
2729 .open = ipv6_route_open,
2730 .read = seq_read,
2731 .llseek = seq_lseek,
2732 .release = single_release_net,
2733};
2734
2735static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2736{
2737 struct net *net = (struct net *)seq->private;
2738 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2739 net->ipv6.rt6_stats->fib_nodes,
2740 net->ipv6.rt6_stats->fib_route_nodes,
2741 net->ipv6.rt6_stats->fib_rt_alloc,
2742 net->ipv6.rt6_stats->fib_rt_entries,
2743 net->ipv6.rt6_stats->fib_rt_cache,
2744 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2745 net->ipv6.rt6_stats->fib_discarded_routes);
2746
2747 return 0;
2748}
2749
2750static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2751{
2752 return single_open_net(inode, file, rt6_stats_seq_show);
2753}
2754
2755static const struct file_operations rt6_stats_seq_fops = {
2756 .owner = THIS_MODULE,
2757 .open = rt6_stats_seq_open,
2758 .read = seq_read,
2759 .llseek = seq_lseek,
2760 .release = single_release_net,
2761};
2762#endif
2763
2764#ifdef CONFIG_SYSCTL
2765
2766static
2767int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2768 void __user *buffer, size_t *lenp, loff_t *ppos)
2769{
2770 struct net *net;
2771 int delay;
2772 if (!write)
2773 return -EINVAL;
2774
2775 net = (struct net *)ctl->extra1;
2776 delay = net->ipv6.sysctl.flush_delay;
2777 proc_dointvec(ctl, write, buffer, lenp, ppos);
2778 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2779 return 0;
2780}
2781
2782ctl_table ipv6_route_table_template[] = {
2783 {
2784 .procname = "flush",
2785 .data = &init_net.ipv6.sysctl.flush_delay,
2786 .maxlen = sizeof(int),
2787 .mode = 0200,
2788 .proc_handler = ipv6_sysctl_rtcache_flush
2789 },
2790 {
2791 .procname = "gc_thresh",
2792 .data = &ip6_dst_ops_template.gc_thresh,
2793 .maxlen = sizeof(int),
2794 .mode = 0644,
2795 .proc_handler = proc_dointvec,
2796 },
2797 {
2798 .procname = "max_size",
2799 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2800 .maxlen = sizeof(int),
2801 .mode = 0644,
2802 .proc_handler = proc_dointvec,
2803 },
2804 {
2805 .procname = "gc_min_interval",
2806 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2807 .maxlen = sizeof(int),
2808 .mode = 0644,
2809 .proc_handler = proc_dointvec_jiffies,
2810 },
2811 {
2812 .procname = "gc_timeout",
2813 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2814 .maxlen = sizeof(int),
2815 .mode = 0644,
2816 .proc_handler = proc_dointvec_jiffies,
2817 },
2818 {
2819 .procname = "gc_interval",
2820 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2821 .maxlen = sizeof(int),
2822 .mode = 0644,
2823 .proc_handler = proc_dointvec_jiffies,
2824 },
2825 {
2826 .procname = "gc_elasticity",
2827 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2828 .maxlen = sizeof(int),
2829 .mode = 0644,
2830 .proc_handler = proc_dointvec,
2831 },
2832 {
2833 .procname = "mtu_expires",
2834 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2835 .maxlen = sizeof(int),
2836 .mode = 0644,
2837 .proc_handler = proc_dointvec_jiffies,
2838 },
2839 {
2840 .procname = "min_adv_mss",
2841 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2842 .maxlen = sizeof(int),
2843 .mode = 0644,
2844 .proc_handler = proc_dointvec,
2845 },
2846 {
2847 .procname = "gc_min_interval_ms",
2848 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2849 .maxlen = sizeof(int),
2850 .mode = 0644,
2851 .proc_handler = proc_dointvec_ms_jiffies,
2852 },
2853 { }
2854};
2855
2856struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2857{
2858 struct ctl_table *table;
2859
2860 table = kmemdup(ipv6_route_table_template,
2861 sizeof(ipv6_route_table_template),
2862 GFP_KERNEL);
2863
2864 if (table) {
2865 table[0].data = &net->ipv6.sysctl.flush_delay;
2866 table[0].extra1 = net;
2867 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2868 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2869 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2870 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2871 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2872 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2873 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2874 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2875 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2876 }
2877
2878 return table;
2879}
2880#endif
2881
2882static int __net_init ip6_route_net_init(struct net *net)
2883{
2884 int ret = -ENOMEM;
2885
2886 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2887 sizeof(net->ipv6.ip6_dst_ops));
2888
2889 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2890 goto out_ip6_dst_ops;
2891
2892 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2893 sizeof(*net->ipv6.ip6_null_entry),
2894 GFP_KERNEL);
2895 if (!net->ipv6.ip6_null_entry)
2896 goto out_ip6_dst_entries;
2897 net->ipv6.ip6_null_entry->dst.path =
2898 (struct dst_entry *)net->ipv6.ip6_null_entry;
2899 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2900 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2901 ip6_template_metrics, true);
2902
2903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2905 sizeof(*net->ipv6.ip6_prohibit_entry),
2906 GFP_KERNEL);
2907 if (!net->ipv6.ip6_prohibit_entry)
2908 goto out_ip6_null_entry;
2909 net->ipv6.ip6_prohibit_entry->dst.path =
2910 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2911 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2912 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2913 ip6_template_metrics, true);
2914
2915 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2916 sizeof(*net->ipv6.ip6_blk_hole_entry),
2917 GFP_KERNEL);
2918 if (!net->ipv6.ip6_blk_hole_entry)
2919 goto out_ip6_prohibit_entry;
2920 net->ipv6.ip6_blk_hole_entry->dst.path =
2921 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2922 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2923 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2924 ip6_template_metrics, true);
2925#endif
2926
2927 net->ipv6.sysctl.flush_delay = 0;
2928 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2929 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2930 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2931 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2932 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2933 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2934 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2935
2936 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2937
2938 ret = 0;
2939out:
2940 return ret;
2941
2942#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2943out_ip6_prohibit_entry:
2944 kfree(net->ipv6.ip6_prohibit_entry);
2945out_ip6_null_entry:
2946 kfree(net->ipv6.ip6_null_entry);
2947#endif
2948out_ip6_dst_entries:
2949 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2950out_ip6_dst_ops:
2951 goto out;
2952}
2953
2954static void __net_exit ip6_route_net_exit(struct net *net)
2955{
2956 kfree(net->ipv6.ip6_null_entry);
2957#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2958 kfree(net->ipv6.ip6_prohibit_entry);
2959 kfree(net->ipv6.ip6_blk_hole_entry);
2960#endif
2961 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2962}
2963
2964static int __net_init ip6_route_net_init_late(struct net *net)
2965{
2966#ifdef CONFIG_PROC_FS
2967 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2968 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2969#endif
2970 return 0;
2971}
2972
2973static void __net_exit ip6_route_net_exit_late(struct net *net)
2974{
2975#ifdef CONFIG_PROC_FS
2976 proc_net_remove(net, "ipv6_route");
2977 proc_net_remove(net, "rt6_stats");
2978#endif
2979}
2980
2981static struct pernet_operations ip6_route_net_ops = {
2982 .init = ip6_route_net_init,
2983 .exit = ip6_route_net_exit,
2984};
2985
2986static int __net_init ipv6_inetpeer_init(struct net *net)
2987{
2988 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2989
2990 if (!bp)
2991 return -ENOMEM;
2992 inet_peer_base_init(bp);
2993 net->ipv6.peers = bp;
2994 return 0;
2995}
2996
2997static void __net_exit ipv6_inetpeer_exit(struct net *net)
2998{
2999 struct inet_peer_base *bp = net->ipv6.peers;
3000
3001 net->ipv6.peers = NULL;
3002 inetpeer_invalidate_tree(bp);
3003 kfree(bp);
3004}
3005
3006static struct pernet_operations ipv6_inetpeer_ops = {
3007 .init = ipv6_inetpeer_init,
3008 .exit = ipv6_inetpeer_exit,
3009};
3010
3011static struct pernet_operations ip6_route_net_late_ops = {
3012 .init = ip6_route_net_init_late,
3013 .exit = ip6_route_net_exit_late,
3014};
3015
3016static struct notifier_block ip6_route_dev_notifier = {
3017 .notifier_call = ip6_route_dev_notify,
3018 .priority = 0,
3019};
3020
3021int __init ip6_route_init(void)
3022{
3023 int ret;
3024
3025 ret = -ENOMEM;
3026 ip6_dst_ops_template.kmem_cachep =
3027 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3028 SLAB_HWCACHE_ALIGN, NULL);
3029 if (!ip6_dst_ops_template.kmem_cachep)
3030 goto out;
3031
3032 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3033 if (ret)
3034 goto out_kmem_cache;
3035
3036 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3037 if (ret)
3038 goto out_dst_entries;
3039
3040 ret = register_pernet_subsys(&ip6_route_net_ops);
3041 if (ret)
3042 goto out_register_inetpeer;
3043
3044 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3045
3046
3047
3048
3049 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3050 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3053 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3055 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3056 #endif
3057 ret = fib6_init();
3058 if (ret)
3059 goto out_register_subsys;
3060
3061 ret = xfrm6_init();
3062 if (ret)
3063 goto out_fib6_init;
3064
3065 ret = fib6_rules_init();
3066 if (ret)
3067 goto xfrm6_init;
3068
3069 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3070 if (ret)
3071 goto fib6_rules_init;
3072
3073 ret = -ENOBUFS;
3074 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3075 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3076 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3077 goto out_register_late_subsys;
3078
3079 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3080 if (ret)
3081 goto out_register_late_subsys;
3082
3083out:
3084 return ret;
3085
3086out_register_late_subsys:
3087 unregister_pernet_subsys(&ip6_route_net_late_ops);
3088fib6_rules_init:
3089 fib6_rules_cleanup();
3090xfrm6_init:
3091 xfrm6_fini();
3092out_fib6_init:
3093 fib6_gc_cleanup();
3094out_register_subsys:
3095 unregister_pernet_subsys(&ip6_route_net_ops);
3096out_register_inetpeer:
3097 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3098out_dst_entries:
3099 dst_entries_destroy(&ip6_dst_blackhole_ops);
3100out_kmem_cache:
3101 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3102 goto out;
3103}
3104
3105void ip6_route_cleanup(void)
3106{
3107 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3108 unregister_pernet_subsys(&ip6_route_net_late_ops);
3109 fib6_rules_cleanup();
3110 xfrm6_fini();
3111 fib6_gc_cleanup();
3112 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3113 unregister_pernet_subsys(&ip6_route_net_ops);
3114 dst_entries_destroy(&ip6_dst_blackhole_ops);
3115 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3116}
3117