1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27#define pr_fmt(fmt) "IPv6: " fmt
28
29#include <linux/capability.h>
30#include <linux/errno.h>
31#include <linux/export.h>
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
40#include <linux/mroute6.h>
41#include <linux/init.h>
42#include <linux/if_arp.h>
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#include <linux/nsproxy.h>
46#include <linux/slab.h>
47#include <net/net_namespace.h>
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
58#include <net/netevent.h>
59#include <net/netlink.h>
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71static unsigned int ip6_mtu(const struct dst_entry *dst);
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76static int ip6_dst_gc(struct dst_ops *ops);
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
85
86#ifdef CONFIG_IPV6_ROUTE_INFO
87static struct rt6_info *rt6_add_route_info(struct net *net,
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
90 unsigned int pref);
91static struct rt6_info *rt6_get_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
94#endif
95
96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97{
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
105 peer = rt6_get_peer_create(rt);
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
129{
130 struct in6_addr *p = &rt->rt6i_gateway;
131
132 if (!ipv6_addr_any(p))
133 return (const void *) p;
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
136 return daddr;
137}
138
139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
142{
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
146 daddr = choose_neigh_daddr(rt, skb, daddr);
147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151}
152
153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154{
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
161 rt->n = n;
162
163 return 0;
164}
165
166static struct dst_ops ip6_dst_ops_template = {
167 .family = AF_INET6,
168 .protocol = cpu_to_be16(ETH_P_IPV6),
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
172 .default_advmss = ip6_default_advmss,
173 .mtu = ip6_mtu,
174 .cow_metrics = ipv6_cow_metrics,
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
180 .redirect = rt6_do_redirect,
181 .local_out = __ip6_local_out,
182 .neigh_lookup = ip6_neigh_lookup,
183};
184
185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186{
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
190}
191
192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
194{
195}
196
197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
199{
200}
201
202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204{
205 return NULL;
206}
207
208static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
210 .protocol = cpu_to_be16(ETH_P_IPV6),
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
213 .mtu = ip6_blackhole_mtu,
214 .default_advmss = ip6_default_advmss,
215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
216 .redirect = ip6_rt_blackhole_redirect,
217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
218 .neigh_lookup = ip6_neigh_lookup,
219};
220
221static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 0,
223};
224
225static struct rt6_info ip6_null_entry_template = {
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = DST_OBSOLETE_FORCE_CHK,
230 .error = -ENETUNREACH,
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
235 .rt6i_protocol = RTPROT_KERNEL,
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238};
239
240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242static int ip6_pkt_prohibit(struct sk_buff *skb);
243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245static struct rt6_info ip6_prohibit_entry_template = {
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = DST_OBSOLETE_FORCE_CHK,
250 .error = -EACCES,
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
255 .rt6i_protocol = RTPROT_KERNEL,
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258};
259
260static struct rt6_info ip6_blk_hole_entry_template = {
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = DST_OBSOLETE_FORCE_CHK,
265 .error = -EINVAL,
266 .input = dst_discard,
267 .output = dst_discard,
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
270 .rt6i_protocol = RTPROT_KERNEL,
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273};
274
275#endif
276
277
278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279 struct net_device *dev,
280 int flags,
281 struct fib6_table *table)
282{
283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 0, DST_OBSOLETE_FORCE_CHK, flags);
285
286 if (rt) {
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 rt->rt6i_genid = rt_genid(net);
292 }
293 return rt;
294}
295
296static void ip6_dst_destroy(struct dst_entry *dst)
297{
298 struct rt6_info *rt = (struct rt6_info *)dst;
299 struct inet6_dev *idev = rt->rt6i_idev;
300
301 if (rt->n)
302 neigh_release(rt->n);
303
304 if (!(rt->dst.flags & DST_HOST))
305 dst_destroy_metrics_generic(dst);
306
307 if (idev) {
308 rt->rt6i_idev = NULL;
309 in6_dev_put(idev);
310 }
311
312 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
313 dst_release(dst->from);
314
315 if (rt6_has_peer(rt)) {
316 struct inet_peer *peer = rt6_peer_ptr(rt);
317 inet_putpeer(peer);
318 }
319}
320
321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
322
323static u32 rt6_peer_genid(void)
324{
325 return atomic_read(&__rt6_peer_genid);
326}
327
328void rt6_bind_peer(struct rt6_info *rt, int create)
329{
330 struct inet_peer_base *base;
331 struct inet_peer *peer;
332
333 base = inetpeer_base_ptr(rt->_rt6i_peer);
334 if (!base)
335 return;
336
337 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
338 if (peer) {
339 if (!rt6_set_peer(rt, peer))
340 inet_putpeer(peer);
341 else
342 rt->rt6i_peer_genid = rt6_peer_genid();
343 }
344}
345
346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347 int how)
348{
349 struct rt6_info *rt = (struct rt6_info *)dst;
350 struct inet6_dev *idev = rt->rt6i_idev;
351 struct net_device *loopback_dev =
352 dev_net(dev)->loopback_dev;
353
354 if (dev != loopback_dev) {
355 if (idev && idev->dev == dev) {
356 struct inet6_dev *loopback_idev =
357 in6_dev_get(loopback_dev);
358 if (loopback_idev) {
359 rt->rt6i_idev = loopback_idev;
360 in6_dev_put(idev);
361 }
362 }
363 if (rt->n && rt->n->dev == dev) {
364 rt->n->dev = loopback_dev;
365 dev_hold(loopback_dev);
366 dev_put(dev);
367 }
368 }
369}
370
371static bool rt6_check_expired(const struct rt6_info *rt)
372{
373 struct rt6_info *ort = NULL;
374
375 if (rt->rt6i_flags & RTF_EXPIRES) {
376 if (time_after(jiffies, rt->dst.expires))
377 return true;
378 } else if (rt->dst.from) {
379 ort = (struct rt6_info *) rt->dst.from;
380 return (ort->rt6i_flags & RTF_EXPIRES) &&
381 time_after(jiffies, ort->dst.expires);
382 }
383 return false;
384}
385
386static bool rt6_need_strict(const struct in6_addr *daddr)
387{
388 return ipv6_addr_type(daddr) &
389 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
390}
391
392
393
394
395
396static inline struct rt6_info *rt6_device_match(struct net *net,
397 struct rt6_info *rt,
398 const struct in6_addr *saddr,
399 int oif,
400 int flags)
401{
402 struct rt6_info *local = NULL;
403 struct rt6_info *sprt;
404
405 if (!oif && ipv6_addr_any(saddr))
406 goto out;
407
408 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
409 struct net_device *dev = sprt->dst.dev;
410
411 if (oif) {
412 if (dev->ifindex == oif)
413 return sprt;
414 if (dev->flags & IFF_LOOPBACK) {
415 if (!sprt->rt6i_idev ||
416 sprt->rt6i_idev->dev->ifindex != oif) {
417 if (flags & RT6_LOOKUP_F_IFACE && oif)
418 continue;
419 if (local && (!oif ||
420 local->rt6i_idev->dev->ifindex == oif))
421 continue;
422 }
423 local = sprt;
424 }
425 } else {
426 if (ipv6_chk_addr(net, saddr, dev,
427 flags & RT6_LOOKUP_F_IFACE))
428 return sprt;
429 }
430 }
431
432 if (oif) {
433 if (local)
434 return local;
435
436 if (flags & RT6_LOOKUP_F_IFACE)
437 return net->ipv6.ip6_null_entry;
438 }
439out:
440 return rt;
441}
442
443#ifdef CONFIG_IPV6_ROUTER_PREF
444static void rt6_probe(struct rt6_info *rt)
445{
446 struct neighbour *neigh;
447
448
449
450
451
452
453
454
455 rcu_read_lock();
456 neigh = rt ? rt->n : NULL;
457 if (!neigh || (neigh->nud_state & NUD_VALID))
458 goto out;
459 read_lock_bh(&neigh->lock);
460 if (!(neigh->nud_state & NUD_VALID) &&
461 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
462 struct in6_addr mcaddr;
463 struct in6_addr *target;
464
465 neigh->updated = jiffies;
466 read_unlock_bh(&neigh->lock);
467
468 target = (struct in6_addr *)&neigh->primary_key;
469 addrconf_addr_solict_mult(target, &mcaddr);
470 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
471 } else {
472 read_unlock_bh(&neigh->lock);
473 }
474out:
475 rcu_read_unlock();
476}
477#else
478static inline void rt6_probe(struct rt6_info *rt)
479{
480}
481#endif
482
483
484
485
486static inline int rt6_check_dev(struct rt6_info *rt, int oif)
487{
488 struct net_device *dev = rt->dst.dev;
489 if (!oif || dev->ifindex == oif)
490 return 2;
491 if ((dev->flags & IFF_LOOPBACK) &&
492 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
493 return 1;
494 return 0;
495}
496
497static inline int rt6_check_neigh(struct rt6_info *rt)
498{
499 struct neighbour *neigh;
500 int m;
501
502 rcu_read_lock();
503 neigh = rt->n;
504 if (rt->rt6i_flags & RTF_NONEXTHOP ||
505 !(rt->rt6i_flags & RTF_GATEWAY))
506 m = 1;
507 else if (neigh) {
508 read_lock_bh(&neigh->lock);
509 if (neigh->nud_state & NUD_VALID)
510 m = 2;
511#ifdef CONFIG_IPV6_ROUTER_PREF
512 else if (neigh->nud_state & NUD_FAILED)
513 m = 0;
514#endif
515 else
516 m = 1;
517 read_unlock_bh(&neigh->lock);
518 } else
519 m = 0;
520 rcu_read_unlock();
521 return m;
522}
523
524static int rt6_score_route(struct rt6_info *rt, int oif,
525 int strict)
526{
527 int m, n;
528
529 m = rt6_check_dev(rt, oif);
530 if (!m && (strict & RT6_LOOKUP_F_IFACE))
531 return -1;
532#ifdef CONFIG_IPV6_ROUTER_PREF
533 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
534#endif
535 n = rt6_check_neigh(rt);
536 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
537 return -1;
538 return m;
539}
540
541static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
542 int *mpri, struct rt6_info *match)
543{
544 int m;
545
546 if (rt6_check_expired(rt))
547 goto out;
548
549 m = rt6_score_route(rt, oif, strict);
550 if (m < 0)
551 goto out;
552
553 if (m > *mpri) {
554 if (strict & RT6_LOOKUP_F_REACHABLE)
555 rt6_probe(match);
556 *mpri = m;
557 match = rt;
558 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
559 rt6_probe(rt);
560 }
561
562out:
563 return match;
564}
565
566static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
567 struct rt6_info *rr_head,
568 u32 metric, int oif, int strict)
569{
570 struct rt6_info *rt, *match;
571 int mpri = -1;
572
573 match = NULL;
574 for (rt = rr_head; rt && rt->rt6i_metric == metric;
575 rt = rt->dst.rt6_next)
576 match = find_match(rt, oif, strict, &mpri, match);
577 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
578 rt = rt->dst.rt6_next)
579 match = find_match(rt, oif, strict, &mpri, match);
580
581 return match;
582}
583
584static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
585{
586 struct rt6_info *match, *rt0;
587 struct net *net;
588
589 rt0 = fn->rr_ptr;
590 if (!rt0)
591 fn->rr_ptr = rt0 = fn->leaf;
592
593 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
594
595 if (!match &&
596 (strict & RT6_LOOKUP_F_REACHABLE)) {
597 struct rt6_info *next = rt0->dst.rt6_next;
598
599
600 if (!next || next->rt6i_metric != rt0->rt6i_metric)
601 next = fn->leaf;
602
603 if (next != rt0)
604 fn->rr_ptr = next;
605 }
606
607 net = dev_net(rt0->dst.dev);
608 return match ? match : net->ipv6.ip6_null_entry;
609}
610
611#ifdef CONFIG_IPV6_ROUTE_INFO
612int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
613 const struct in6_addr *gwaddr)
614{
615 struct net *net = dev_net(dev);
616 struct route_info *rinfo = (struct route_info *) opt;
617 struct in6_addr prefix_buf, *prefix;
618 unsigned int pref;
619 unsigned long lifetime;
620 struct rt6_info *rt;
621
622 if (len < sizeof(struct route_info)) {
623 return -EINVAL;
624 }
625
626
627 if (rinfo->length > 3) {
628 return -EINVAL;
629 } else if (rinfo->prefix_len > 128) {
630 return -EINVAL;
631 } else if (rinfo->prefix_len > 64) {
632 if (rinfo->length < 2) {
633 return -EINVAL;
634 }
635 } else if (rinfo->prefix_len > 0) {
636 if (rinfo->length < 1) {
637 return -EINVAL;
638 }
639 }
640
641 pref = rinfo->route_pref;
642 if (pref == ICMPV6_ROUTER_PREF_INVALID)
643 return -EINVAL;
644
645 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
646
647 if (rinfo->length == 3)
648 prefix = (struct in6_addr *)rinfo->prefix;
649 else {
650
651 ipv6_addr_prefix(&prefix_buf,
652 (struct in6_addr *)rinfo->prefix,
653 rinfo->prefix_len);
654 prefix = &prefix_buf;
655 }
656
657 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
658 dev->ifindex);
659
660 if (rt && !lifetime) {
661 ip6_del_rt(rt);
662 rt = NULL;
663 }
664
665 if (!rt && lifetime)
666 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
667 pref);
668 else if (rt)
669 rt->rt6i_flags = RTF_ROUTEINFO |
670 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
671
672 if (rt) {
673 if (!addrconf_finite_timeout(lifetime))
674 rt6_clean_expires(rt);
675 else
676 rt6_set_expires(rt, jiffies + HZ * lifetime);
677
678 dst_release(&rt->dst);
679 }
680 return 0;
681}
682#endif
683
684#define BACKTRACK(__net, saddr) \
685do { \
686 if (rt == __net->ipv6.ip6_null_entry) { \
687 struct fib6_node *pn; \
688 while (1) { \
689 if (fn->fn_flags & RTN_TL_ROOT) \
690 goto out; \
691 pn = fn->parent; \
692 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
693 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
694 else \
695 fn = pn; \
696 if (fn->fn_flags & RTN_RTINFO) \
697 goto restart; \
698 } \
699 } \
700} while (0)
701
702static struct rt6_info *ip6_pol_route_lookup(struct net *net,
703 struct fib6_table *table,
704 struct flowi6 *fl6, int flags)
705{
706 struct fib6_node *fn;
707 struct rt6_info *rt;
708
709 read_lock_bh(&table->tb6_lock);
710 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
711restart:
712 rt = fn->leaf;
713 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
714 BACKTRACK(net, &fl6->saddr);
715out:
716 dst_use(&rt->dst, jiffies);
717 read_unlock_bh(&table->tb6_lock);
718 return rt;
719
720}
721
722struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
723 int flags)
724{
725 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
726}
727EXPORT_SYMBOL_GPL(ip6_route_lookup);
728
729struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
730 const struct in6_addr *saddr, int oif, int strict)
731{
732 struct flowi6 fl6 = {
733 .flowi6_oif = oif,
734 .daddr = *daddr,
735 };
736 struct dst_entry *dst;
737 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
738
739 if (saddr) {
740 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
741 flags |= RT6_LOOKUP_F_HAS_SADDR;
742 }
743
744 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
745 if (dst->error == 0)
746 return (struct rt6_info *) dst;
747
748 dst_release(dst);
749
750 return NULL;
751}
752
753EXPORT_SYMBOL(rt6_lookup);
754
755
756
757
758
759
760
761static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
762{
763 int err;
764 struct fib6_table *table;
765
766 table = rt->rt6i_table;
767 write_lock_bh(&table->tb6_lock);
768 err = fib6_add(&table->tb6_root, rt, info);
769 write_unlock_bh(&table->tb6_lock);
770
771 return err;
772}
773
774int ip6_ins_rt(struct rt6_info *rt)
775{
776 struct nl_info info = {
777 .nl_net = dev_net(rt->dst.dev),
778 };
779 return __ip6_ins_rt(rt, &info);
780}
781
782static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
783 const struct in6_addr *daddr,
784 const struct in6_addr *saddr)
785{
786 struct rt6_info *rt;
787
788
789
790
791
792 rt = ip6_rt_copy(ort, daddr);
793
794 if (rt) {
795 int attempts = !in_softirq();
796
797 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
798 if (ort->rt6i_dst.plen != 128 &&
799 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
800 rt->rt6i_flags |= RTF_ANYCAST;
801 rt->rt6i_gateway = *daddr;
802 }
803
804 rt->rt6i_flags |= RTF_CACHE;
805
806#ifdef CONFIG_IPV6_SUBTREES
807 if (rt->rt6i_src.plen && saddr) {
808 rt->rt6i_src.addr = *saddr;
809 rt->rt6i_src.plen = 128;
810 }
811#endif
812
813 retry:
814 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
815 struct net *net = dev_net(rt->dst.dev);
816 int saved_rt_min_interval =
817 net->ipv6.sysctl.ip6_rt_gc_min_interval;
818 int saved_rt_elasticity =
819 net->ipv6.sysctl.ip6_rt_gc_elasticity;
820
821 if (attempts-- > 0) {
822 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
823 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
824
825 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
826
827 net->ipv6.sysctl.ip6_rt_gc_elasticity =
828 saved_rt_elasticity;
829 net->ipv6.sysctl.ip6_rt_gc_min_interval =
830 saved_rt_min_interval;
831 goto retry;
832 }
833
834 net_warn_ratelimited("Neighbour table overflow\n");
835 dst_free(&rt->dst);
836 return NULL;
837 }
838 }
839
840 return rt;
841}
842
843static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
844 const struct in6_addr *daddr)
845{
846 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
847
848 if (rt) {
849 rt->rt6i_flags |= RTF_CACHE;
850 rt->n = neigh_clone(ort->n);
851 }
852 return rt;
853}
854
855static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
856 struct flowi6 *fl6, int flags)
857{
858 struct fib6_node *fn;
859 struct rt6_info *rt, *nrt;
860 int strict = 0;
861 int attempts = 3;
862 int err;
863 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
864
865 strict |= flags & RT6_LOOKUP_F_IFACE;
866
867relookup:
868 read_lock_bh(&table->tb6_lock);
869
870restart_2:
871 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
872
873restart:
874 rt = rt6_select(fn, oif, strict | reachable);
875
876 BACKTRACK(net, &fl6->saddr);
877 if (rt == net->ipv6.ip6_null_entry ||
878 rt->rt6i_flags & RTF_CACHE)
879 goto out;
880
881 dst_hold(&rt->dst);
882 read_unlock_bh(&table->tb6_lock);
883
884 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
885 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
886 else if (!(rt->dst.flags & DST_HOST))
887 nrt = rt6_alloc_clone(rt, &fl6->daddr);
888 else
889 goto out2;
890
891 dst_release(&rt->dst);
892 rt = nrt ? : net->ipv6.ip6_null_entry;
893
894 dst_hold(&rt->dst);
895 if (nrt) {
896 err = ip6_ins_rt(nrt);
897 if (!err)
898 goto out2;
899 }
900
901 if (--attempts <= 0)
902 goto out2;
903
904
905
906
907
908 dst_release(&rt->dst);
909 goto relookup;
910
911out:
912 if (reachable) {
913 reachable = 0;
914 goto restart_2;
915 }
916 dst_hold(&rt->dst);
917 read_unlock_bh(&table->tb6_lock);
918out2:
919 rt->dst.lastuse = jiffies;
920 rt->dst.__use++;
921
922 return rt;
923}
924
925static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
926 struct flowi6 *fl6, int flags)
927{
928 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
929}
930
931static struct dst_entry *ip6_route_input_lookup(struct net *net,
932 struct net_device *dev,
933 struct flowi6 *fl6, int flags)
934{
935 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
936 flags |= RT6_LOOKUP_F_IFACE;
937
938 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
939}
940
941void ip6_route_input(struct sk_buff *skb)
942{
943 const struct ipv6hdr *iph = ipv6_hdr(skb);
944 struct net *net = dev_net(skb->dev);
945 int flags = RT6_LOOKUP_F_HAS_SADDR;
946 struct flowi6 fl6 = {
947 .flowi6_iif = skb->dev->ifindex,
948 .daddr = iph->daddr,
949 .saddr = iph->saddr,
950 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
951 .flowi6_mark = skb->mark,
952 .flowi6_proto = iph->nexthdr,
953 };
954
955 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
956}
957
958static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
959 struct flowi6 *fl6, int flags)
960{
961 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
962}
963
964struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
965 struct flowi6 *fl6)
966{
967 int flags = 0;
968
969 fl6->flowi6_iif = net->loopback_dev->ifindex;
970
971 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
972 flags |= RT6_LOOKUP_F_IFACE;
973
974 if (!ipv6_addr_any(&fl6->saddr))
975 flags |= RT6_LOOKUP_F_HAS_SADDR;
976 else if (sk)
977 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
978
979 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
980}
981
982EXPORT_SYMBOL(ip6_route_output);
983
984struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
985{
986 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
987 struct dst_entry *new = NULL;
988
989 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
990 if (rt) {
991 new = &rt->dst;
992
993 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
994 rt6_init_peer(rt, net->ipv6.peers);
995
996 new->__use = 1;
997 new->input = dst_discard;
998 new->output = dst_discard;
999
1000 if (dst_metrics_read_only(&ort->dst))
1001 new->_metrics = ort->dst._metrics;
1002 else
1003 dst_copy_metrics(new, &ort->dst);
1004 rt->rt6i_idev = ort->rt6i_idev;
1005 if (rt->rt6i_idev)
1006 in6_dev_hold(rt->rt6i_idev);
1007
1008 rt->rt6i_gateway = ort->rt6i_gateway;
1009 rt->rt6i_flags = ort->rt6i_flags;
1010 rt6_clean_expires(rt);
1011 rt->rt6i_metric = 0;
1012
1013 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1014#ifdef CONFIG_IPV6_SUBTREES
1015 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1016#endif
1017
1018 dst_free(new);
1019 }
1020
1021 dst_release(dst_orig);
1022 return new ? new : ERR_PTR(-ENOMEM);
1023}
1024
1025
1026
1027
1028
1029static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1030{
1031 struct rt6_info *rt;
1032
1033 rt = (struct rt6_info *) dst;
1034
1035
1036
1037
1038
1039 if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1040 return NULL;
1041
1042 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1043 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1044 if (!rt6_has_peer(rt))
1045 rt6_bind_peer(rt, 0);
1046 rt->rt6i_peer_genid = rt6_peer_genid();
1047 }
1048 return dst;
1049 }
1050 return NULL;
1051}
1052
1053static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1054{
1055 struct rt6_info *rt = (struct rt6_info *) dst;
1056
1057 if (rt) {
1058 if (rt->rt6i_flags & RTF_CACHE) {
1059 if (rt6_check_expired(rt)) {
1060 ip6_del_rt(rt);
1061 dst = NULL;
1062 }
1063 } else {
1064 dst_release(dst);
1065 dst = NULL;
1066 }
1067 }
1068 return dst;
1069}
1070
1071static void ip6_link_failure(struct sk_buff *skb)
1072{
1073 struct rt6_info *rt;
1074
1075 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1076
1077 rt = (struct rt6_info *) skb_dst(skb);
1078 if (rt) {
1079 if (rt->rt6i_flags & RTF_CACHE)
1080 rt6_update_expires(rt, 0);
1081 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1082 rt->rt6i_node->fn_sernum = -1;
1083 }
1084}
1085
1086static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1087 struct sk_buff *skb, u32 mtu)
1088{
1089 struct rt6_info *rt6 = (struct rt6_info*)dst;
1090
1091 dst_confirm(dst);
1092 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1093 struct net *net = dev_net(dst->dev);
1094
1095 rt6->rt6i_flags |= RTF_MODIFIED;
1096 if (mtu < IPV6_MIN_MTU) {
1097 u32 features = dst_metric(dst, RTAX_FEATURES);
1098 mtu = IPV6_MIN_MTU;
1099 features |= RTAX_FEATURE_ALLFRAG;
1100 dst_metric_set(dst, RTAX_FEATURES, features);
1101 }
1102 dst_metric_set(dst, RTAX_MTU, mtu);
1103 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1104 }
1105}
1106
1107void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108 int oif, u32 mark)
1109{
1110 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1111 struct dst_entry *dst;
1112 struct flowi6 fl6;
1113
1114 memset(&fl6, 0, sizeof(fl6));
1115 fl6.flowi6_oif = oif;
1116 fl6.flowi6_mark = mark;
1117 fl6.flowi6_flags = 0;
1118 fl6.daddr = iph->daddr;
1119 fl6.saddr = iph->saddr;
1120 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1121
1122 dst = ip6_route_output(net, NULL, &fl6);
1123 if (!dst->error)
1124 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1125 dst_release(dst);
1126}
1127EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1128
1129void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1130{
1131 ip6_update_pmtu(skb, sock_net(sk), mtu,
1132 sk->sk_bound_dev_if, sk->sk_mark);
1133}
1134EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1135
1136void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1137{
1138 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1139 struct dst_entry *dst;
1140 struct flowi6 fl6;
1141
1142 memset(&fl6, 0, sizeof(fl6));
1143 fl6.flowi6_oif = oif;
1144 fl6.flowi6_mark = mark;
1145 fl6.flowi6_flags = 0;
1146 fl6.daddr = iph->daddr;
1147 fl6.saddr = iph->saddr;
1148 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1149
1150 dst = ip6_route_output(net, NULL, &fl6);
1151 if (!dst->error)
1152 rt6_do_redirect(dst, NULL, skb);
1153 dst_release(dst);
1154}
1155EXPORT_SYMBOL_GPL(ip6_redirect);
1156
1157void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158{
1159 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1160}
1161EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1162
1163static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1164{
1165 struct net_device *dev = dst->dev;
1166 unsigned int mtu = dst_mtu(dst);
1167 struct net *net = dev_net(dev);
1168
1169 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1170
1171 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1172 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1173
1174
1175
1176
1177
1178
1179
1180 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1181 mtu = IPV6_MAXPLEN;
1182 return mtu;
1183}
1184
1185static unsigned int ip6_mtu(const struct dst_entry *dst)
1186{
1187 struct inet6_dev *idev;
1188 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1189
1190 if (mtu)
1191 return mtu;
1192
1193 mtu = IPV6_MIN_MTU;
1194
1195 rcu_read_lock();
1196 idev = __in6_dev_get(dst->dev);
1197 if (idev)
1198 mtu = idev->cnf.mtu6;
1199 rcu_read_unlock();
1200
1201 return mtu;
1202}
1203
1204static struct dst_entry *icmp6_dst_gc_list;
1205static DEFINE_SPINLOCK(icmp6_dst_lock);
1206
1207struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1208 struct neighbour *neigh,
1209 struct flowi6 *fl6)
1210{
1211 struct dst_entry *dst;
1212 struct rt6_info *rt;
1213 struct inet6_dev *idev = in6_dev_get(dev);
1214 struct net *net = dev_net(dev);
1215
1216 if (unlikely(!idev))
1217 return ERR_PTR(-ENODEV);
1218
1219 rt = ip6_dst_alloc(net, dev, 0, NULL);
1220 if (unlikely(!rt)) {
1221 in6_dev_put(idev);
1222 dst = ERR_PTR(-ENOMEM);
1223 goto out;
1224 }
1225
1226 if (neigh)
1227 neigh_hold(neigh);
1228 else {
1229 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1230 if (IS_ERR(neigh)) {
1231 in6_dev_put(idev);
1232 dst_free(&rt->dst);
1233 return ERR_CAST(neigh);
1234 }
1235 }
1236
1237 rt->dst.flags |= DST_HOST;
1238 rt->dst.output = ip6_output;
1239 rt->n = neigh;
1240 atomic_set(&rt->dst.__refcnt, 1);
1241 rt->rt6i_dst.addr = fl6->daddr;
1242 rt->rt6i_dst.plen = 128;
1243 rt->rt6i_idev = idev;
1244 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1245
1246 spin_lock_bh(&icmp6_dst_lock);
1247 rt->dst.next = icmp6_dst_gc_list;
1248 icmp6_dst_gc_list = &rt->dst;
1249 spin_unlock_bh(&icmp6_dst_lock);
1250
1251 fib6_force_start_gc(net);
1252
1253 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1254
1255out:
1256 return dst;
1257}
1258
1259int icmp6_dst_gc(void)
1260{
1261 struct dst_entry *dst, **pprev;
1262 int more = 0;
1263
1264 spin_lock_bh(&icmp6_dst_lock);
1265 pprev = &icmp6_dst_gc_list;
1266
1267 while ((dst = *pprev) != NULL) {
1268 if (!atomic_read(&dst->__refcnt)) {
1269 *pprev = dst->next;
1270 dst_free(dst);
1271 } else {
1272 pprev = &dst->next;
1273 ++more;
1274 }
1275 }
1276
1277 spin_unlock_bh(&icmp6_dst_lock);
1278
1279 return more;
1280}
1281
1282static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1283 void *arg)
1284{
1285 struct dst_entry *dst, **pprev;
1286
1287 spin_lock_bh(&icmp6_dst_lock);
1288 pprev = &icmp6_dst_gc_list;
1289 while ((dst = *pprev) != NULL) {
1290 struct rt6_info *rt = (struct rt6_info *) dst;
1291 if (func(rt, arg)) {
1292 *pprev = dst->next;
1293 dst_free(dst);
1294 } else {
1295 pprev = &dst->next;
1296 }
1297 }
1298 spin_unlock_bh(&icmp6_dst_lock);
1299}
1300
1301static int ip6_dst_gc(struct dst_ops *ops)
1302{
1303 unsigned long now = jiffies;
1304 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1305 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1306 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1307 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1308 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1309 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1310 int entries;
1311
1312 entries = dst_entries_get_fast(ops);
1313 if (time_after(rt_last_gc + rt_min_interval, now) &&
1314 entries <= rt_max_size)
1315 goto out;
1316
1317 net->ipv6.ip6_rt_gc_expire++;
1318 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1319 net->ipv6.ip6_rt_last_gc = now;
1320 entries = dst_entries_get_slow(ops);
1321 if (entries < ops->gc_thresh)
1322 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1323out:
1324 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1325 return entries > rt_max_size;
1326}
1327
1328
1329
1330
1331
1332
1333
1334int ip6_dst_hoplimit(struct dst_entry *dst)
1335{
1336 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1337 if (hoplimit == 0) {
1338 struct net_device *dev = dst->dev;
1339 struct inet6_dev *idev;
1340
1341 rcu_read_lock();
1342 idev = __in6_dev_get(dev);
1343 if (idev)
1344 hoplimit = idev->cnf.hop_limit;
1345 else
1346 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1347 rcu_read_unlock();
1348 }
1349 return hoplimit;
1350}
1351EXPORT_SYMBOL(ip6_dst_hoplimit);
1352
1353
1354
1355
1356
1357int ip6_route_add(struct fib6_config *cfg)
1358{
1359 int err;
1360 struct net *net = cfg->fc_nlinfo.nl_net;
1361 struct rt6_info *rt = NULL;
1362 struct net_device *dev = NULL;
1363 struct inet6_dev *idev = NULL;
1364 struct fib6_table *table;
1365 int addr_type;
1366
1367 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1368 return -EINVAL;
1369#ifndef CONFIG_IPV6_SUBTREES
1370 if (cfg->fc_src_len)
1371 return -EINVAL;
1372#endif
1373 if (cfg->fc_ifindex) {
1374 err = -ENODEV;
1375 dev = dev_get_by_index(net, cfg->fc_ifindex);
1376 if (!dev)
1377 goto out;
1378 idev = in6_dev_get(dev);
1379 if (!idev)
1380 goto out;
1381 }
1382
1383 if (cfg->fc_metric == 0)
1384 cfg->fc_metric = IP6_RT_PRIO_USER;
1385
1386 err = -ENOBUFS;
1387 if (cfg->fc_nlinfo.nlh &&
1388 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1389 table = fib6_get_table(net, cfg->fc_table);
1390 if (!table) {
1391 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1392 table = fib6_new_table(net, cfg->fc_table);
1393 }
1394 } else {
1395 table = fib6_new_table(net, cfg->fc_table);
1396 }
1397
1398 if (!table)
1399 goto out;
1400
1401 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1402
1403 if (!rt) {
1404 err = -ENOMEM;
1405 goto out;
1406 }
1407
1408 if (cfg->fc_flags & RTF_EXPIRES)
1409 rt6_set_expires(rt, jiffies +
1410 clock_t_to_jiffies(cfg->fc_expires));
1411 else
1412 rt6_clean_expires(rt);
1413
1414 if (cfg->fc_protocol == RTPROT_UNSPEC)
1415 cfg->fc_protocol = RTPROT_BOOT;
1416 rt->rt6i_protocol = cfg->fc_protocol;
1417
1418 addr_type = ipv6_addr_type(&cfg->fc_dst);
1419
1420 if (addr_type & IPV6_ADDR_MULTICAST)
1421 rt->dst.input = ip6_mc_input;
1422 else if (cfg->fc_flags & RTF_LOCAL)
1423 rt->dst.input = ip6_input;
1424 else
1425 rt->dst.input = ip6_forward;
1426
1427 rt->dst.output = ip6_output;
1428
1429 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1430 rt->rt6i_dst.plen = cfg->fc_dst_len;
1431 if (rt->rt6i_dst.plen == 128)
1432 rt->dst.flags |= DST_HOST;
1433
1434 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1435 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1436 if (!metrics) {
1437 err = -ENOMEM;
1438 goto out;
1439 }
1440 dst_init_metrics(&rt->dst, metrics, 0);
1441 }
1442#ifdef CONFIG_IPV6_SUBTREES
1443 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1444 rt->rt6i_src.plen = cfg->fc_src_len;
1445#endif
1446
1447 rt->rt6i_metric = cfg->fc_metric;
1448
1449
1450
1451
1452 if ((cfg->fc_flags & RTF_REJECT) ||
1453 (dev && (dev->flags & IFF_LOOPBACK) &&
1454 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1455 !(cfg->fc_flags & RTF_LOCAL))) {
1456
1457 if (dev != net->loopback_dev) {
1458 if (dev) {
1459 dev_put(dev);
1460 in6_dev_put(idev);
1461 }
1462 dev = net->loopback_dev;
1463 dev_hold(dev);
1464 idev = in6_dev_get(dev);
1465 if (!idev) {
1466 err = -ENODEV;
1467 goto out;
1468 }
1469 }
1470 rt->dst.output = ip6_pkt_discard_out;
1471 rt->dst.input = ip6_pkt_discard;
1472 rt->dst.error = -ENETUNREACH;
1473 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1474 goto install_route;
1475 }
1476
1477 if (cfg->fc_flags & RTF_GATEWAY) {
1478 const struct in6_addr *gw_addr;
1479 int gwa_type;
1480
1481 gw_addr = &cfg->fc_gateway;
1482 rt->rt6i_gateway = *gw_addr;
1483 gwa_type = ipv6_addr_type(gw_addr);
1484
1485 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1486 struct rt6_info *grt;
1487
1488
1489
1490
1491
1492
1493
1494
1495 err = -EINVAL;
1496 if (!(gwa_type & IPV6_ADDR_UNICAST))
1497 goto out;
1498
1499 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1500
1501 err = -EHOSTUNREACH;
1502 if (!grt)
1503 goto out;
1504 if (dev) {
1505 if (dev != grt->dst.dev) {
1506 dst_release(&grt->dst);
1507 goto out;
1508 }
1509 } else {
1510 dev = grt->dst.dev;
1511 idev = grt->rt6i_idev;
1512 dev_hold(dev);
1513 in6_dev_hold(grt->rt6i_idev);
1514 }
1515 if (!(grt->rt6i_flags & RTF_GATEWAY))
1516 err = 0;
1517 dst_release(&grt->dst);
1518
1519 if (err)
1520 goto out;
1521 }
1522 err = -EINVAL;
1523 if (!dev || (dev->flags & IFF_LOOPBACK))
1524 goto out;
1525 }
1526
1527 err = -ENODEV;
1528 if (!dev)
1529 goto out;
1530
1531 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1532 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1533 err = -EINVAL;
1534 goto out;
1535 }
1536 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1537 rt->rt6i_prefsrc.plen = 128;
1538 } else
1539 rt->rt6i_prefsrc.plen = 0;
1540
1541 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1542 err = rt6_bind_neighbour(rt, dev);
1543 if (err)
1544 goto out;
1545 }
1546
1547 rt->rt6i_flags = cfg->fc_flags;
1548
1549install_route:
1550 if (cfg->fc_mx) {
1551 struct nlattr *nla;
1552 int remaining;
1553
1554 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1555 int type = nla_type(nla);
1556
1557 if (type) {
1558 if (type > RTAX_MAX) {
1559 err = -EINVAL;
1560 goto out;
1561 }
1562
1563 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1564 }
1565 }
1566 }
1567
1568 rt->dst.dev = dev;
1569 rt->rt6i_idev = idev;
1570 rt->rt6i_table = table;
1571
1572 cfg->fc_nlinfo.nl_net = dev_net(dev);
1573
1574 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1575
1576out:
1577 if (dev)
1578 dev_put(dev);
1579 if (idev)
1580 in6_dev_put(idev);
1581 if (rt)
1582 dst_free(&rt->dst);
1583 return err;
1584}
1585
1586static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1587{
1588 int err;
1589 struct fib6_table *table;
1590 struct net *net = dev_net(rt->dst.dev);
1591
1592 if (rt == net->ipv6.ip6_null_entry) {
1593 err = -ENOENT;
1594 goto out;
1595 }
1596
1597 table = rt->rt6i_table;
1598 write_lock_bh(&table->tb6_lock);
1599 err = fib6_del(rt, info);
1600 write_unlock_bh(&table->tb6_lock);
1601
1602out:
1603 dst_release(&rt->dst);
1604 return err;
1605}
1606
1607int ip6_del_rt(struct rt6_info *rt)
1608{
1609 struct nl_info info = {
1610 .nl_net = dev_net(rt->dst.dev),
1611 };
1612 return __ip6_del_rt(rt, &info);
1613}
1614
1615static int ip6_route_del(struct fib6_config *cfg)
1616{
1617 struct fib6_table *table;
1618 struct fib6_node *fn;
1619 struct rt6_info *rt;
1620 int err = -ESRCH;
1621
1622 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1623 if (!table)
1624 return err;
1625
1626 read_lock_bh(&table->tb6_lock);
1627
1628 fn = fib6_locate(&table->tb6_root,
1629 &cfg->fc_dst, cfg->fc_dst_len,
1630 &cfg->fc_src, cfg->fc_src_len);
1631
1632 if (fn) {
1633 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1634 if (cfg->fc_ifindex &&
1635 (!rt->dst.dev ||
1636 rt->dst.dev->ifindex != cfg->fc_ifindex))
1637 continue;
1638 if (cfg->fc_flags & RTF_GATEWAY &&
1639 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1640 continue;
1641 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1642 continue;
1643 dst_hold(&rt->dst);
1644 read_unlock_bh(&table->tb6_lock);
1645
1646 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1647 }
1648 }
1649 read_unlock_bh(&table->tb6_lock);
1650
1651 return err;
1652}
1653
1654static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1655{
1656 struct net *net = dev_net(skb->dev);
1657 struct netevent_redirect netevent;
1658 struct rt6_info *rt, *nrt = NULL;
1659 const struct in6_addr *target;
1660 struct ndisc_options ndopts;
1661 const struct in6_addr *dest;
1662 struct neighbour *old_neigh;
1663 struct inet6_dev *in6_dev;
1664 struct neighbour *neigh;
1665 struct icmp6hdr *icmph;
1666 int optlen, on_link;
1667 u8 *lladdr;
1668
1669 optlen = skb->tail - skb->transport_header;
1670 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1671
1672 if (optlen < 0) {
1673 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1674 return;
1675 }
1676
1677 icmph = icmp6_hdr(skb);
1678 target = (const struct in6_addr *) (icmph + 1);
1679 dest = target + 1;
1680
1681 if (ipv6_addr_is_multicast(dest)) {
1682 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1683 return;
1684 }
1685
1686 on_link = 0;
1687 if (ipv6_addr_equal(dest, target)) {
1688 on_link = 1;
1689 } else if (ipv6_addr_type(target) !=
1690 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1691 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1692 return;
1693 }
1694
1695 in6_dev = __in6_dev_get(skb->dev);
1696 if (!in6_dev)
1697 return;
1698 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1699 return;
1700
1701
1702
1703
1704
1705
1706 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1707 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1708 return;
1709 }
1710
1711 lladdr = NULL;
1712 if (ndopts.nd_opts_tgt_lladdr) {
1713 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1714 skb->dev);
1715 if (!lladdr) {
1716 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1717 return;
1718 }
1719 }
1720
1721 rt = (struct rt6_info *) dst;
1722 if (rt == net->ipv6.ip6_null_entry) {
1723 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1724 return;
1725 }
1726
1727
1728
1729
1730
1731 dst_confirm(&rt->dst);
1732
1733 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1734 if (!neigh)
1735 return;
1736
1737
1738 old_neigh = rt->n;
1739 if (neigh == old_neigh)
1740 goto out;
1741
1742
1743
1744
1745
1746 neigh_update(neigh, lladdr, NUD_STALE,
1747 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1748 NEIGH_UPDATE_F_OVERRIDE|
1749 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1750 NEIGH_UPDATE_F_ISROUTER))
1751 );
1752
1753 nrt = ip6_rt_copy(rt, dest);
1754 if (!nrt)
1755 goto out;
1756
1757 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1758 if (on_link)
1759 nrt->rt6i_flags &= ~RTF_GATEWAY;
1760
1761 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1762 nrt->n = neigh_clone(neigh);
1763
1764 if (ip6_ins_rt(nrt))
1765 goto out;
1766
1767 netevent.old = &rt->dst;
1768 netevent.old_neigh = old_neigh;
1769 netevent.new = &nrt->dst;
1770 netevent.new_neigh = neigh;
1771 netevent.daddr = dest;
1772 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1773
1774 if (rt->rt6i_flags & RTF_CACHE) {
1775 rt = (struct rt6_info *) dst_clone(&rt->dst);
1776 ip6_del_rt(rt);
1777 }
1778
1779out:
1780 neigh_release(neigh);
1781}
1782
1783
1784
1785
1786
1787static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1788 const struct in6_addr *dest)
1789{
1790 struct net *net = dev_net(ort->dst.dev);
1791 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1792 ort->rt6i_table);
1793
1794 if (rt) {
1795 rt->dst.input = ort->dst.input;
1796 rt->dst.output = ort->dst.output;
1797 rt->dst.flags |= DST_HOST;
1798
1799 rt->rt6i_dst.addr = *dest;
1800 rt->rt6i_dst.plen = 128;
1801 dst_copy_metrics(&rt->dst, &ort->dst);
1802 rt->dst.error = ort->dst.error;
1803 rt->rt6i_idev = ort->rt6i_idev;
1804 if (rt->rt6i_idev)
1805 in6_dev_hold(rt->rt6i_idev);
1806 rt->dst.lastuse = jiffies;
1807
1808 rt->rt6i_gateway = ort->rt6i_gateway;
1809 rt->rt6i_flags = ort->rt6i_flags;
1810 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1811 (RTF_DEFAULT | RTF_ADDRCONF))
1812 rt6_set_from(rt, ort);
1813 else
1814 rt6_clean_expires(rt);
1815 rt->rt6i_metric = 0;
1816
1817#ifdef CONFIG_IPV6_SUBTREES
1818 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1819#endif
1820 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1821 rt->rt6i_table = ort->rt6i_table;
1822 }
1823 return rt;
1824}
1825
1826#ifdef CONFIG_IPV6_ROUTE_INFO
1827static struct rt6_info *rt6_get_route_info(struct net *net,
1828 const struct in6_addr *prefix, int prefixlen,
1829 const struct in6_addr *gwaddr, int ifindex)
1830{
1831 struct fib6_node *fn;
1832 struct rt6_info *rt = NULL;
1833 struct fib6_table *table;
1834
1835 table = fib6_get_table(net, RT6_TABLE_INFO);
1836 if (!table)
1837 return NULL;
1838
1839 write_lock_bh(&table->tb6_lock);
1840 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1841 if (!fn)
1842 goto out;
1843
1844 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1845 if (rt->dst.dev->ifindex != ifindex)
1846 continue;
1847 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1848 continue;
1849 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1850 continue;
1851 dst_hold(&rt->dst);
1852 break;
1853 }
1854out:
1855 write_unlock_bh(&table->tb6_lock);
1856 return rt;
1857}
1858
1859static struct rt6_info *rt6_add_route_info(struct net *net,
1860 const struct in6_addr *prefix, int prefixlen,
1861 const struct in6_addr *gwaddr, int ifindex,
1862 unsigned int pref)
1863{
1864 struct fib6_config cfg = {
1865 .fc_table = RT6_TABLE_INFO,
1866 .fc_metric = IP6_RT_PRIO_USER,
1867 .fc_ifindex = ifindex,
1868 .fc_dst_len = prefixlen,
1869 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1870 RTF_UP | RTF_PREF(pref),
1871 .fc_nlinfo.pid = 0,
1872 .fc_nlinfo.nlh = NULL,
1873 .fc_nlinfo.nl_net = net,
1874 };
1875
1876 cfg.fc_dst = *prefix;
1877 cfg.fc_gateway = *gwaddr;
1878
1879
1880 if (!prefixlen)
1881 cfg.fc_flags |= RTF_DEFAULT;
1882
1883 ip6_route_add(&cfg);
1884
1885 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1886}
1887#endif
1888
1889struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1890{
1891 struct rt6_info *rt;
1892 struct fib6_table *table;
1893
1894 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1895 if (!table)
1896 return NULL;
1897
1898 write_lock_bh(&table->tb6_lock);
1899 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1900 if (dev == rt->dst.dev &&
1901 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1902 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1903 break;
1904 }
1905 if (rt)
1906 dst_hold(&rt->dst);
1907 write_unlock_bh(&table->tb6_lock);
1908 return rt;
1909}
1910
1911struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1912 struct net_device *dev,
1913 unsigned int pref)
1914{
1915 struct fib6_config cfg = {
1916 .fc_table = RT6_TABLE_DFLT,
1917 .fc_metric = IP6_RT_PRIO_USER,
1918 .fc_ifindex = dev->ifindex,
1919 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1920 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1921 .fc_nlinfo.pid = 0,
1922 .fc_nlinfo.nlh = NULL,
1923 .fc_nlinfo.nl_net = dev_net(dev),
1924 };
1925
1926 cfg.fc_gateway = *gwaddr;
1927
1928 ip6_route_add(&cfg);
1929
1930 return rt6_get_dflt_router(gwaddr, dev);
1931}
1932
1933void rt6_purge_dflt_routers(struct net *net)
1934{
1935 struct rt6_info *rt;
1936 struct fib6_table *table;
1937
1938
1939 table = fib6_get_table(net, RT6_TABLE_DFLT);
1940 if (!table)
1941 return;
1942
1943restart:
1944 read_lock_bh(&table->tb6_lock);
1945 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1946 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1947 dst_hold(&rt->dst);
1948 read_unlock_bh(&table->tb6_lock);
1949 ip6_del_rt(rt);
1950 goto restart;
1951 }
1952 }
1953 read_unlock_bh(&table->tb6_lock);
1954}
1955
1956static void rtmsg_to_fib6_config(struct net *net,
1957 struct in6_rtmsg *rtmsg,
1958 struct fib6_config *cfg)
1959{
1960 memset(cfg, 0, sizeof(*cfg));
1961
1962 cfg->fc_table = RT6_TABLE_MAIN;
1963 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1964 cfg->fc_metric = rtmsg->rtmsg_metric;
1965 cfg->fc_expires = rtmsg->rtmsg_info;
1966 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1967 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1968 cfg->fc_flags = rtmsg->rtmsg_flags;
1969
1970 cfg->fc_nlinfo.nl_net = net;
1971
1972 cfg->fc_dst = rtmsg->rtmsg_dst;
1973 cfg->fc_src = rtmsg->rtmsg_src;
1974 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1975}
1976
1977int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1978{
1979 struct fib6_config cfg;
1980 struct in6_rtmsg rtmsg;
1981 int err;
1982
1983 switch(cmd) {
1984 case SIOCADDRT:
1985 case SIOCDELRT:
1986 if (!capable(CAP_NET_ADMIN))
1987 return -EPERM;
1988 err = copy_from_user(&rtmsg, arg,
1989 sizeof(struct in6_rtmsg));
1990 if (err)
1991 return -EFAULT;
1992
1993 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1994
1995 rtnl_lock();
1996 switch (cmd) {
1997 case SIOCADDRT:
1998 err = ip6_route_add(&cfg);
1999 break;
2000 case SIOCDELRT:
2001 err = ip6_route_del(&cfg);
2002 break;
2003 default:
2004 err = -EINVAL;
2005 }
2006 rtnl_unlock();
2007
2008 return err;
2009 }
2010
2011 return -EINVAL;
2012}
2013
2014
2015
2016
2017
2018static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2019{
2020 int type;
2021 struct dst_entry *dst = skb_dst(skb);
2022 switch (ipstats_mib_noroutes) {
2023 case IPSTATS_MIB_INNOROUTES:
2024 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2025 if (type == IPV6_ADDR_ANY) {
2026 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2027 IPSTATS_MIB_INADDRERRORS);
2028 break;
2029 }
2030
2031 case IPSTATS_MIB_OUTNOROUTES:
2032 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2033 ipstats_mib_noroutes);
2034 break;
2035 }
2036 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2037 kfree_skb(skb);
2038 return 0;
2039}
2040
2041static int ip6_pkt_discard(struct sk_buff *skb)
2042{
2043 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2044}
2045
2046static int ip6_pkt_discard_out(struct sk_buff *skb)
2047{
2048 skb->dev = skb_dst(skb)->dev;
2049 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2050}
2051
2052#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2053
2054static int ip6_pkt_prohibit(struct sk_buff *skb)
2055{
2056 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2057}
2058
2059static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2060{
2061 skb->dev = skb_dst(skb)->dev;
2062 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2063}
2064
2065#endif
2066
2067
2068
2069
2070
2071struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2072 const struct in6_addr *addr,
2073 bool anycast)
2074{
2075 struct net *net = dev_net(idev->dev);
2076 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2077 int err;
2078
2079 if (!rt) {
2080 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2081 return ERR_PTR(-ENOMEM);
2082 }
2083
2084 in6_dev_hold(idev);
2085
2086 rt->dst.flags |= DST_HOST;
2087 rt->dst.input = ip6_input;
2088 rt->dst.output = ip6_output;
2089 rt->rt6i_idev = idev;
2090
2091 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2092 if (anycast)
2093 rt->rt6i_flags |= RTF_ANYCAST;
2094 else
2095 rt->rt6i_flags |= RTF_LOCAL;
2096 err = rt6_bind_neighbour(rt, rt->dst.dev);
2097 if (err) {
2098 dst_free(&rt->dst);
2099 return ERR_PTR(err);
2100 }
2101
2102 rt->rt6i_dst.addr = *addr;
2103 rt->rt6i_dst.plen = 128;
2104 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2105
2106 atomic_set(&rt->dst.__refcnt, 1);
2107
2108 return rt;
2109}
2110
2111int ip6_route_get_saddr(struct net *net,
2112 struct rt6_info *rt,
2113 const struct in6_addr *daddr,
2114 unsigned int prefs,
2115 struct in6_addr *saddr)
2116{
2117 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2118 int err = 0;
2119 if (rt->rt6i_prefsrc.plen)
2120 *saddr = rt->rt6i_prefsrc.addr;
2121 else
2122 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2123 daddr, prefs, saddr);
2124 return err;
2125}
2126
2127
2128struct arg_dev_net_ip {
2129 struct net_device *dev;
2130 struct net *net;
2131 struct in6_addr *addr;
2132};
2133
2134static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2135{
2136 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2137 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2138 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2139
2140 if (((void *)rt->dst.dev == dev || !dev) &&
2141 rt != net->ipv6.ip6_null_entry &&
2142 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2143
2144 rt->rt6i_prefsrc.plen = 0;
2145 }
2146 return 0;
2147}
2148
2149void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2150{
2151 struct net *net = dev_net(ifp->idev->dev);
2152 struct arg_dev_net_ip adni = {
2153 .dev = ifp->idev->dev,
2154 .net = net,
2155 .addr = &ifp->addr,
2156 };
2157 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2158}
2159
2160struct arg_dev_net {
2161 struct net_device *dev;
2162 struct net *net;
2163};
2164
2165static int fib6_ifdown(struct rt6_info *rt, void *arg)
2166{
2167 const struct arg_dev_net *adn = arg;
2168 const struct net_device *dev = adn->dev;
2169
2170 if ((rt->dst.dev == dev || !dev) &&
2171 rt != adn->net->ipv6.ip6_null_entry)
2172 return -1;
2173
2174 return 0;
2175}
2176
2177void rt6_ifdown(struct net *net, struct net_device *dev)
2178{
2179 struct arg_dev_net adn = {
2180 .dev = dev,
2181 .net = net,
2182 };
2183
2184 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2185 icmp6_clean_all(fib6_ifdown, &adn);
2186}
2187
2188struct rt6_mtu_change_arg {
2189 struct net_device *dev;
2190 unsigned int mtu;
2191};
2192
2193static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2194{
2195 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2196 struct inet6_dev *idev;
2197
2198
2199
2200
2201
2202
2203
2204 idev = __in6_dev_get(arg->dev);
2205 if (!idev)
2206 return 0;
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222 if (rt->dst.dev == arg->dev &&
2223 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2224 (dst_mtu(&rt->dst) >= arg->mtu ||
2225 (dst_mtu(&rt->dst) < arg->mtu &&
2226 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2227 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2228 }
2229 return 0;
2230}
2231
2232void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2233{
2234 struct rt6_mtu_change_arg arg = {
2235 .dev = dev,
2236 .mtu = mtu,
2237 };
2238
2239 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2240}
2241
2242static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2243 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2244 [RTA_OIF] = { .type = NLA_U32 },
2245 [RTA_IIF] = { .type = NLA_U32 },
2246 [RTA_PRIORITY] = { .type = NLA_U32 },
2247 [RTA_METRICS] = { .type = NLA_NESTED },
2248};
2249
2250static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2251 struct fib6_config *cfg)
2252{
2253 struct rtmsg *rtm;
2254 struct nlattr *tb[RTA_MAX+1];
2255 int err;
2256
2257 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2258 if (err < 0)
2259 goto errout;
2260
2261 err = -EINVAL;
2262 rtm = nlmsg_data(nlh);
2263 memset(cfg, 0, sizeof(*cfg));
2264
2265 cfg->fc_table = rtm->rtm_table;
2266 cfg->fc_dst_len = rtm->rtm_dst_len;
2267 cfg->fc_src_len = rtm->rtm_src_len;
2268 cfg->fc_flags = RTF_UP;
2269 cfg->fc_protocol = rtm->rtm_protocol;
2270
2271 if (rtm->rtm_type == RTN_UNREACHABLE)
2272 cfg->fc_flags |= RTF_REJECT;
2273
2274 if (rtm->rtm_type == RTN_LOCAL)
2275 cfg->fc_flags |= RTF_LOCAL;
2276
2277 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2278 cfg->fc_nlinfo.nlh = nlh;
2279 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2280
2281 if (tb[RTA_GATEWAY]) {
2282 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2283 cfg->fc_flags |= RTF_GATEWAY;
2284 }
2285
2286 if (tb[RTA_DST]) {
2287 int plen = (rtm->rtm_dst_len + 7) >> 3;
2288
2289 if (nla_len(tb[RTA_DST]) < plen)
2290 goto errout;
2291
2292 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2293 }
2294
2295 if (tb[RTA_SRC]) {
2296 int plen = (rtm->rtm_src_len + 7) >> 3;
2297
2298 if (nla_len(tb[RTA_SRC]) < plen)
2299 goto errout;
2300
2301 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2302 }
2303
2304 if (tb[RTA_PREFSRC])
2305 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2306
2307 if (tb[RTA_OIF])
2308 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2309
2310 if (tb[RTA_PRIORITY])
2311 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2312
2313 if (tb[RTA_METRICS]) {
2314 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2315 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2316 }
2317
2318 if (tb[RTA_TABLE])
2319 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2320
2321 err = 0;
2322errout:
2323 return err;
2324}
2325
2326static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2327{
2328 struct fib6_config cfg;
2329 int err;
2330
2331 err = rtm_to_fib6_config(skb, nlh, &cfg);
2332 if (err < 0)
2333 return err;
2334
2335 return ip6_route_del(&cfg);
2336}
2337
2338static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2339{
2340 struct fib6_config cfg;
2341 int err;
2342
2343 err = rtm_to_fib6_config(skb, nlh, &cfg);
2344 if (err < 0)
2345 return err;
2346
2347 return ip6_route_add(&cfg);
2348}
2349
2350static inline size_t rt6_nlmsg_size(void)
2351{
2352 return NLMSG_ALIGN(sizeof(struct rtmsg))
2353 + nla_total_size(16)
2354 + nla_total_size(16)
2355 + nla_total_size(16)
2356 + nla_total_size(16)
2357 + nla_total_size(4)
2358 + nla_total_size(4)
2359 + nla_total_size(4)
2360 + nla_total_size(4)
2361 + RTAX_MAX * nla_total_size(4)
2362 + nla_total_size(sizeof(struct rta_cacheinfo));
2363}
2364
2365static int rt6_fill_node(struct net *net,
2366 struct sk_buff *skb, struct rt6_info *rt,
2367 struct in6_addr *dst, struct in6_addr *src,
2368 int iif, int type, u32 pid, u32 seq,
2369 int prefix, int nowait, unsigned int flags)
2370{
2371 struct rtmsg *rtm;
2372 struct nlmsghdr *nlh;
2373 long expires;
2374 u32 table;
2375 struct neighbour *n;
2376
2377 if (prefix) {
2378 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2379
2380 return 1;
2381 }
2382 }
2383
2384 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2385 if (!nlh)
2386 return -EMSGSIZE;
2387
2388 rtm = nlmsg_data(nlh);
2389 rtm->rtm_family = AF_INET6;
2390 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2391 rtm->rtm_src_len = rt->rt6i_src.plen;
2392 rtm->rtm_tos = 0;
2393 if (rt->rt6i_table)
2394 table = rt->rt6i_table->tb6_id;
2395 else
2396 table = RT6_TABLE_UNSPEC;
2397 rtm->rtm_table = table;
2398 if (nla_put_u32(skb, RTA_TABLE, table))
2399 goto nla_put_failure;
2400 if (rt->rt6i_flags & RTF_REJECT)
2401 rtm->rtm_type = RTN_UNREACHABLE;
2402 else if (rt->rt6i_flags & RTF_LOCAL)
2403 rtm->rtm_type = RTN_LOCAL;
2404 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2405 rtm->rtm_type = RTN_LOCAL;
2406 else
2407 rtm->rtm_type = RTN_UNICAST;
2408 rtm->rtm_flags = 0;
2409 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2410 rtm->rtm_protocol = rt->rt6i_protocol;
2411 if (rt->rt6i_flags & RTF_DYNAMIC)
2412 rtm->rtm_protocol = RTPROT_REDIRECT;
2413 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2414 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2415 rtm->rtm_protocol = RTPROT_RA;
2416 else
2417 rtm->rtm_protocol = RTPROT_KERNEL;
2418 }
2419
2420 if (rt->rt6i_flags & RTF_CACHE)
2421 rtm->rtm_flags |= RTM_F_CLONED;
2422
2423 if (dst) {
2424 if (nla_put(skb, RTA_DST, 16, dst))
2425 goto nla_put_failure;
2426 rtm->rtm_dst_len = 128;
2427 } else if (rtm->rtm_dst_len)
2428 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2429 goto nla_put_failure;
2430#ifdef CONFIG_IPV6_SUBTREES
2431 if (src) {
2432 if (nla_put(skb, RTA_SRC, 16, src))
2433 goto nla_put_failure;
2434 rtm->rtm_src_len = 128;
2435 } else if (rtm->rtm_src_len &&
2436 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2437 goto nla_put_failure;
2438#endif
2439 if (iif) {
2440#ifdef CONFIG_IPV6_MROUTE
2441 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2442 int err = ip6mr_get_route(net, skb, rtm, nowait);
2443 if (err <= 0) {
2444 if (!nowait) {
2445 if (err == 0)
2446 return 0;
2447 goto nla_put_failure;
2448 } else {
2449 if (err == -EMSGSIZE)
2450 goto nla_put_failure;
2451 }
2452 }
2453 } else
2454#endif
2455 if (nla_put_u32(skb, RTA_IIF, iif))
2456 goto nla_put_failure;
2457 } else if (dst) {
2458 struct in6_addr saddr_buf;
2459 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2460 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2461 goto nla_put_failure;
2462 }
2463
2464 if (rt->rt6i_prefsrc.plen) {
2465 struct in6_addr saddr_buf;
2466 saddr_buf = rt->rt6i_prefsrc.addr;
2467 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2468 goto nla_put_failure;
2469 }
2470
2471 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2472 goto nla_put_failure;
2473
2474 rcu_read_lock();
2475 n = rt->n;
2476 if (n) {
2477 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2478 rcu_read_unlock();
2479 goto nla_put_failure;
2480 }
2481 }
2482 rcu_read_unlock();
2483
2484 if (rt->dst.dev &&
2485 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2486 goto nla_put_failure;
2487 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2488 goto nla_put_failure;
2489
2490 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2491
2492 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2493 goto nla_put_failure;
2494
2495 return nlmsg_end(skb, nlh);
2496
2497nla_put_failure:
2498 nlmsg_cancel(skb, nlh);
2499 return -EMSGSIZE;
2500}
2501
2502int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2503{
2504 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2505 int prefix;
2506
2507 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2508 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2509 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2510 } else
2511 prefix = 0;
2512
2513 return rt6_fill_node(arg->net,
2514 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2515 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2516 prefix, 0, NLM_F_MULTI);
2517}
2518
2519static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2520{
2521 struct net *net = sock_net(in_skb->sk);
2522 struct nlattr *tb[RTA_MAX+1];
2523 struct rt6_info *rt;
2524 struct sk_buff *skb;
2525 struct rtmsg *rtm;
2526 struct flowi6 fl6;
2527 int err, iif = 0, oif = 0;
2528
2529 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2530 if (err < 0)
2531 goto errout;
2532
2533 err = -EINVAL;
2534 memset(&fl6, 0, sizeof(fl6));
2535
2536 if (tb[RTA_SRC]) {
2537 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2538 goto errout;
2539
2540 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2541 }
2542
2543 if (tb[RTA_DST]) {
2544 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2545 goto errout;
2546
2547 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2548 }
2549
2550 if (tb[RTA_IIF])
2551 iif = nla_get_u32(tb[RTA_IIF]);
2552
2553 if (tb[RTA_OIF])
2554 oif = nla_get_u32(tb[RTA_OIF]);
2555
2556 if (iif) {
2557 struct net_device *dev;
2558 int flags = 0;
2559
2560 dev = __dev_get_by_index(net, iif);
2561 if (!dev) {
2562 err = -ENODEV;
2563 goto errout;
2564 }
2565
2566 fl6.flowi6_iif = iif;
2567
2568 if (!ipv6_addr_any(&fl6.saddr))
2569 flags |= RT6_LOOKUP_F_HAS_SADDR;
2570
2571 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2572 flags);
2573 } else {
2574 fl6.flowi6_oif = oif;
2575
2576 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2577 }
2578
2579 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2580 if (!skb) {
2581 dst_release(&rt->dst);
2582 err = -ENOBUFS;
2583 goto errout;
2584 }
2585
2586
2587
2588
2589 skb_reset_mac_header(skb);
2590 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2591
2592 skb_dst_set(skb, &rt->dst);
2593
2594 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2595 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2596 nlh->nlmsg_seq, 0, 0, 0);
2597 if (err < 0) {
2598 kfree_skb(skb);
2599 goto errout;
2600 }
2601
2602 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2603errout:
2604 return err;
2605}
2606
2607void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2608{
2609 struct sk_buff *skb;
2610 struct net *net = info->nl_net;
2611 u32 seq;
2612 int err;
2613
2614 err = -ENOBUFS;
2615 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2616
2617 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2618 if (!skb)
2619 goto errout;
2620
2621 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2622 event, info->pid, seq, 0, 0, 0);
2623 if (err < 0) {
2624
2625 WARN_ON(err == -EMSGSIZE);
2626 kfree_skb(skb);
2627 goto errout;
2628 }
2629 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2630 info->nlh, gfp_any());
2631 return;
2632errout:
2633 if (err < 0)
2634 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2635}
2636
2637static int ip6_route_dev_notify(struct notifier_block *this,
2638 unsigned long event, void *data)
2639{
2640 struct net_device *dev = (struct net_device *)data;
2641 struct net *net = dev_net(dev);
2642
2643 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2644 net->ipv6.ip6_null_entry->dst.dev = dev;
2645 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2646#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2647 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2648 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2649 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2650 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2651#endif
2652 }
2653
2654 return NOTIFY_OK;
2655}
2656
2657
2658
2659
2660
2661#ifdef CONFIG_PROC_FS
2662
2663struct rt6_proc_arg
2664{
2665 char *buffer;
2666 int offset;
2667 int length;
2668 int skip;
2669 int len;
2670};
2671
2672static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2673{
2674 struct seq_file *m = p_arg;
2675 struct neighbour *n;
2676
2677 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2678
2679#ifdef CONFIG_IPV6_SUBTREES
2680 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2681#else
2682 seq_puts(m, "00000000000000000000000000000000 00 ");
2683#endif
2684 rcu_read_lock();
2685 n = rt->n;
2686 if (n) {
2687 seq_printf(m, "%pi6", n->primary_key);
2688 } else {
2689 seq_puts(m, "00000000000000000000000000000000");
2690 }
2691 rcu_read_unlock();
2692 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2693 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2694 rt->dst.__use, rt->rt6i_flags,
2695 rt->dst.dev ? rt->dst.dev->name : "");
2696 return 0;
2697}
2698
2699static int ipv6_route_show(struct seq_file *m, void *v)
2700{
2701 struct net *net = (struct net *)m->private;
2702 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2703 return 0;
2704}
2705
2706static int ipv6_route_open(struct inode *inode, struct file *file)
2707{
2708 return single_open_net(inode, file, ipv6_route_show);
2709}
2710
2711static const struct file_operations ipv6_route_proc_fops = {
2712 .owner = THIS_MODULE,
2713 .open = ipv6_route_open,
2714 .read = seq_read,
2715 .llseek = seq_lseek,
2716 .release = single_release_net,
2717};
2718
2719static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2720{
2721 struct net *net = (struct net *)seq->private;
2722 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2723 net->ipv6.rt6_stats->fib_nodes,
2724 net->ipv6.rt6_stats->fib_route_nodes,
2725 net->ipv6.rt6_stats->fib_rt_alloc,
2726 net->ipv6.rt6_stats->fib_rt_entries,
2727 net->ipv6.rt6_stats->fib_rt_cache,
2728 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2729 net->ipv6.rt6_stats->fib_discarded_routes);
2730
2731 return 0;
2732}
2733
2734static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2735{
2736 return single_open_net(inode, file, rt6_stats_seq_show);
2737}
2738
2739static const struct file_operations rt6_stats_seq_fops = {
2740 .owner = THIS_MODULE,
2741 .open = rt6_stats_seq_open,
2742 .read = seq_read,
2743 .llseek = seq_lseek,
2744 .release = single_release_net,
2745};
2746#endif
2747
2748#ifdef CONFIG_SYSCTL
2749
2750static
2751int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2752 void __user *buffer, size_t *lenp, loff_t *ppos)
2753{
2754 struct net *net;
2755 int delay;
2756 if (!write)
2757 return -EINVAL;
2758
2759 net = (struct net *)ctl->extra1;
2760 delay = net->ipv6.sysctl.flush_delay;
2761 proc_dointvec(ctl, write, buffer, lenp, ppos);
2762 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2763 return 0;
2764}
2765
2766ctl_table ipv6_route_table_template[] = {
2767 {
2768 .procname = "flush",
2769 .data = &init_net.ipv6.sysctl.flush_delay,
2770 .maxlen = sizeof(int),
2771 .mode = 0200,
2772 .proc_handler = ipv6_sysctl_rtcache_flush
2773 },
2774 {
2775 .procname = "gc_thresh",
2776 .data = &ip6_dst_ops_template.gc_thresh,
2777 .maxlen = sizeof(int),
2778 .mode = 0644,
2779 .proc_handler = proc_dointvec,
2780 },
2781 {
2782 .procname = "max_size",
2783 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2784 .maxlen = sizeof(int),
2785 .mode = 0644,
2786 .proc_handler = proc_dointvec,
2787 },
2788 {
2789 .procname = "gc_min_interval",
2790 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2791 .maxlen = sizeof(int),
2792 .mode = 0644,
2793 .proc_handler = proc_dointvec_jiffies,
2794 },
2795 {
2796 .procname = "gc_timeout",
2797 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2798 .maxlen = sizeof(int),
2799 .mode = 0644,
2800 .proc_handler = proc_dointvec_jiffies,
2801 },
2802 {
2803 .procname = "gc_interval",
2804 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2805 .maxlen = sizeof(int),
2806 .mode = 0644,
2807 .proc_handler = proc_dointvec_jiffies,
2808 },
2809 {
2810 .procname = "gc_elasticity",
2811 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2812 .maxlen = sizeof(int),
2813 .mode = 0644,
2814 .proc_handler = proc_dointvec,
2815 },
2816 {
2817 .procname = "mtu_expires",
2818 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2819 .maxlen = sizeof(int),
2820 .mode = 0644,
2821 .proc_handler = proc_dointvec_jiffies,
2822 },
2823 {
2824 .procname = "min_adv_mss",
2825 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2826 .maxlen = sizeof(int),
2827 .mode = 0644,
2828 .proc_handler = proc_dointvec,
2829 },
2830 {
2831 .procname = "gc_min_interval_ms",
2832 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2833 .maxlen = sizeof(int),
2834 .mode = 0644,
2835 .proc_handler = proc_dointvec_ms_jiffies,
2836 },
2837 { }
2838};
2839
2840struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2841{
2842 struct ctl_table *table;
2843
2844 table = kmemdup(ipv6_route_table_template,
2845 sizeof(ipv6_route_table_template),
2846 GFP_KERNEL);
2847
2848 if (table) {
2849 table[0].data = &net->ipv6.sysctl.flush_delay;
2850 table[0].extra1 = net;
2851 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2852 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2853 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2854 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2855 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2856 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2857 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2858 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2859 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2860 }
2861
2862 return table;
2863}
2864#endif
2865
2866static int __net_init ip6_route_net_init(struct net *net)
2867{
2868 int ret = -ENOMEM;
2869
2870 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2871 sizeof(net->ipv6.ip6_dst_ops));
2872
2873 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2874 goto out_ip6_dst_ops;
2875
2876 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2877 sizeof(*net->ipv6.ip6_null_entry),
2878 GFP_KERNEL);
2879 if (!net->ipv6.ip6_null_entry)
2880 goto out_ip6_dst_entries;
2881 net->ipv6.ip6_null_entry->dst.path =
2882 (struct dst_entry *)net->ipv6.ip6_null_entry;
2883 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2884 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2885 ip6_template_metrics, true);
2886
2887#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2888 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2889 sizeof(*net->ipv6.ip6_prohibit_entry),
2890 GFP_KERNEL);
2891 if (!net->ipv6.ip6_prohibit_entry)
2892 goto out_ip6_null_entry;
2893 net->ipv6.ip6_prohibit_entry->dst.path =
2894 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2895 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2896 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2897 ip6_template_metrics, true);
2898
2899 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2900 sizeof(*net->ipv6.ip6_blk_hole_entry),
2901 GFP_KERNEL);
2902 if (!net->ipv6.ip6_blk_hole_entry)
2903 goto out_ip6_prohibit_entry;
2904 net->ipv6.ip6_blk_hole_entry->dst.path =
2905 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2906 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2907 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2908 ip6_template_metrics, true);
2909#endif
2910
2911 net->ipv6.sysctl.flush_delay = 0;
2912 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2913 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2914 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2915 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2916 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2917 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2918 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2919
2920 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2921
2922 ret = 0;
2923out:
2924 return ret;
2925
2926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2927out_ip6_prohibit_entry:
2928 kfree(net->ipv6.ip6_prohibit_entry);
2929out_ip6_null_entry:
2930 kfree(net->ipv6.ip6_null_entry);
2931#endif
2932out_ip6_dst_entries:
2933 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2934out_ip6_dst_ops:
2935 goto out;
2936}
2937
2938static void __net_exit ip6_route_net_exit(struct net *net)
2939{
2940 kfree(net->ipv6.ip6_null_entry);
2941#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2942 kfree(net->ipv6.ip6_prohibit_entry);
2943 kfree(net->ipv6.ip6_blk_hole_entry);
2944#endif
2945 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2946}
2947
2948static int __net_init ip6_route_net_init_late(struct net *net)
2949{
2950#ifdef CONFIG_PROC_FS
2951 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2952 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2953#endif
2954 return 0;
2955}
2956
2957static void __net_exit ip6_route_net_exit_late(struct net *net)
2958{
2959#ifdef CONFIG_PROC_FS
2960 proc_net_remove(net, "ipv6_route");
2961 proc_net_remove(net, "rt6_stats");
2962#endif
2963}
2964
2965static struct pernet_operations ip6_route_net_ops = {
2966 .init = ip6_route_net_init,
2967 .exit = ip6_route_net_exit,
2968};
2969
2970static int __net_init ipv6_inetpeer_init(struct net *net)
2971{
2972 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2973
2974 if (!bp)
2975 return -ENOMEM;
2976 inet_peer_base_init(bp);
2977 net->ipv6.peers = bp;
2978 return 0;
2979}
2980
2981static void __net_exit ipv6_inetpeer_exit(struct net *net)
2982{
2983 struct inet_peer_base *bp = net->ipv6.peers;
2984
2985 net->ipv6.peers = NULL;
2986 inetpeer_invalidate_tree(bp);
2987 kfree(bp);
2988}
2989
2990static struct pernet_operations ipv6_inetpeer_ops = {
2991 .init = ipv6_inetpeer_init,
2992 .exit = ipv6_inetpeer_exit,
2993};
2994
2995static struct pernet_operations ip6_route_net_late_ops = {
2996 .init = ip6_route_net_init_late,
2997 .exit = ip6_route_net_exit_late,
2998};
2999
3000static struct notifier_block ip6_route_dev_notifier = {
3001 .notifier_call = ip6_route_dev_notify,
3002 .priority = 0,
3003};
3004
3005int __init ip6_route_init(void)
3006{
3007 int ret;
3008
3009 ret = -ENOMEM;
3010 ip6_dst_ops_template.kmem_cachep =
3011 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3012 SLAB_HWCACHE_ALIGN, NULL);
3013 if (!ip6_dst_ops_template.kmem_cachep)
3014 goto out;
3015
3016 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3017 if (ret)
3018 goto out_kmem_cache;
3019
3020 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3021 if (ret)
3022 goto out_dst_entries;
3023
3024 ret = register_pernet_subsys(&ip6_route_net_ops);
3025 if (ret)
3026 goto out_register_inetpeer;
3027
3028 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3029
3030
3031
3032
3033 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3034 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3035 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3036 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3037 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3038 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3039 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3040 #endif
3041 ret = fib6_init();
3042 if (ret)
3043 goto out_register_subsys;
3044
3045 ret = xfrm6_init();
3046 if (ret)
3047 goto out_fib6_init;
3048
3049 ret = fib6_rules_init();
3050 if (ret)
3051 goto xfrm6_init;
3052
3053 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3054 if (ret)
3055 goto fib6_rules_init;
3056
3057 ret = -ENOBUFS;
3058 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3059 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3060 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3061 goto out_register_late_subsys;
3062
3063 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3064 if (ret)
3065 goto out_register_late_subsys;
3066
3067out:
3068 return ret;
3069
3070out_register_late_subsys:
3071 unregister_pernet_subsys(&ip6_route_net_late_ops);
3072fib6_rules_init:
3073 fib6_rules_cleanup();
3074xfrm6_init:
3075 xfrm6_fini();
3076out_fib6_init:
3077 fib6_gc_cleanup();
3078out_register_subsys:
3079 unregister_pernet_subsys(&ip6_route_net_ops);
3080out_register_inetpeer:
3081 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3082out_dst_entries:
3083 dst_entries_destroy(&ip6_dst_blackhole_ops);
3084out_kmem_cache:
3085 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3086 goto out;
3087}
3088
3089void ip6_route_cleanup(void)
3090{
3091 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3092 unregister_pernet_subsys(&ip6_route_net_late_ops);
3093 fib6_rules_cleanup();
3094 xfrm6_fini();
3095 fib6_gc_cleanup();
3096 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3097 unregister_pernet_subsys(&ip6_route_net_ops);
3098 dst_entries_destroy(&ip6_dst_blackhole_ops);
3099 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3100}
3101