linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
  60
  61#include <asm/uaccess.h>
  62
  63#ifdef CONFIG_SYSCTL
  64#include <linux/sysctl.h>
  65#endif
  66
  67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
  68                                    const struct in6_addr *dest);
  69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  70static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  71static unsigned int      ip6_mtu(const struct dst_entry *dst);
  72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  73static void             ip6_dst_destroy(struct dst_entry *);
  74static void             ip6_dst_ifdown(struct dst_entry *,
  75                                       struct net_device *dev, int how);
  76static int               ip6_dst_gc(struct dst_ops *ops);
  77
  78static int              ip6_pkt_discard(struct sk_buff *skb);
  79static int              ip6_pkt_discard_out(struct sk_buff *skb);
  80static void             ip6_link_failure(struct sk_buff *skb);
  81static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  82                                           struct sk_buff *skb, u32 mtu);
  83static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  84                                        struct sk_buff *skb);
  85
  86#ifdef CONFIG_IPV6_ROUTE_INFO
  87static struct rt6_info *rt6_add_route_info(struct net *net,
  88                                           const struct in6_addr *prefix, int prefixlen,
  89                                           const struct in6_addr *gwaddr, int ifindex,
  90                                           unsigned int pref);
  91static struct rt6_info *rt6_get_route_info(struct net *net,
  92                                           const struct in6_addr *prefix, int prefixlen,
  93                                           const struct in6_addr *gwaddr, int ifindex);
  94#endif
  95
  96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
  97{
  98        struct rt6_info *rt = (struct rt6_info *) dst;
  99        struct inet_peer *peer;
 100        u32 *p = NULL;
 101
 102        if (!(rt->dst.flags & DST_HOST))
 103                return NULL;
 104
 105        peer = rt6_get_peer_create(rt);
 106        if (peer) {
 107                u32 *old_p = __DST_METRICS_PTR(old);
 108                unsigned long prev, new;
 109
 110                p = peer->metrics;
 111                if (inet_metrics_new(peer))
 112                        memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 113
 114                new = (unsigned long) p;
 115                prev = cmpxchg(&dst->_metrics, old, new);
 116
 117                if (prev != old) {
 118                        p = __DST_METRICS_PTR(prev);
 119                        if (prev & DST_METRICS_READ_ONLY)
 120                                p = NULL;
 121                }
 122        }
 123        return p;
 124}
 125
 126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 127                                             struct sk_buff *skb,
 128                                             const void *daddr)
 129{
 130        struct in6_addr *p = &rt->rt6i_gateway;
 131
 132        if (!ipv6_addr_any(p))
 133                return (const void *) p;
 134        else if (skb)
 135                return &ipv6_hdr(skb)->daddr;
 136        return daddr;
 137}
 138
 139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 140                                          struct sk_buff *skb,
 141                                          const void *daddr)
 142{
 143        struct rt6_info *rt = (struct rt6_info *) dst;
 144        struct neighbour *n;
 145
 146        daddr = choose_neigh_daddr(rt, skb, daddr);
 147        n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
 148        if (n)
 149                return n;
 150        return neigh_create(&nd_tbl, daddr, dst->dev);
 151}
 152
 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
 154{
 155        struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
 156        if (!n) {
 157                n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
 158                if (IS_ERR(n))
 159                        return PTR_ERR(n);
 160        }
 161        rt->n = n;
 162
 163        return 0;
 164}
 165
 166static struct dst_ops ip6_dst_ops_template = {
 167        .family                 =       AF_INET6,
 168        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 169        .gc                     =       ip6_dst_gc,
 170        .gc_thresh              =       1024,
 171        .check                  =       ip6_dst_check,
 172        .default_advmss         =       ip6_default_advmss,
 173        .mtu                    =       ip6_mtu,
 174        .cow_metrics            =       ipv6_cow_metrics,
 175        .destroy                =       ip6_dst_destroy,
 176        .ifdown                 =       ip6_dst_ifdown,
 177        .negative_advice        =       ip6_negative_advice,
 178        .link_failure           =       ip6_link_failure,
 179        .update_pmtu            =       ip6_rt_update_pmtu,
 180        .redirect               =       rt6_do_redirect,
 181        .local_out              =       __ip6_local_out,
 182        .neigh_lookup           =       ip6_neigh_lookup,
 183};
 184
 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 186{
 187        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 188
 189        return mtu ? : dst->dev->mtu;
 190}
 191
 192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 193                                         struct sk_buff *skb, u32 mtu)
 194{
 195}
 196
 197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 198                                      struct sk_buff *skb)
 199{
 200}
 201
 202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 203                                         unsigned long old)
 204{
 205        return NULL;
 206}
 207
 208static struct dst_ops ip6_dst_blackhole_ops = {
 209        .family                 =       AF_INET6,
 210        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 211        .destroy                =       ip6_dst_destroy,
 212        .check                  =       ip6_dst_check,
 213        .mtu                    =       ip6_blackhole_mtu,
 214        .default_advmss         =       ip6_default_advmss,
 215        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 216        .redirect               =       ip6_rt_blackhole_redirect,
 217        .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
 218        .neigh_lookup           =       ip6_neigh_lookup,
 219};
 220
 221static const u32 ip6_template_metrics[RTAX_MAX] = {
 222        [RTAX_HOPLIMIT - 1] = 0,
 223};
 224
 225static const struct rt6_info ip6_null_entry_template = {
 226        .dst = {
 227                .__refcnt       = ATOMIC_INIT(1),
 228                .__use          = 1,
 229                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 230                .error          = -ENETUNREACH,
 231                .input          = ip6_pkt_discard,
 232                .output         = ip6_pkt_discard_out,
 233        },
 234        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 235        .rt6i_protocol  = RTPROT_KERNEL,
 236        .rt6i_metric    = ~(u32) 0,
 237        .rt6i_ref       = ATOMIC_INIT(1),
 238};
 239
 240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 241
 242static int ip6_pkt_prohibit(struct sk_buff *skb);
 243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 244
 245static const struct rt6_info ip6_prohibit_entry_template = {
 246        .dst = {
 247                .__refcnt       = ATOMIC_INIT(1),
 248                .__use          = 1,
 249                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 250                .error          = -EACCES,
 251                .input          = ip6_pkt_prohibit,
 252                .output         = ip6_pkt_prohibit_out,
 253        },
 254        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 255        .rt6i_protocol  = RTPROT_KERNEL,
 256        .rt6i_metric    = ~(u32) 0,
 257        .rt6i_ref       = ATOMIC_INIT(1),
 258};
 259
 260static const struct rt6_info ip6_blk_hole_entry_template = {
 261        .dst = {
 262                .__refcnt       = ATOMIC_INIT(1),
 263                .__use          = 1,
 264                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 265                .error          = -EINVAL,
 266                .input          = dst_discard,
 267                .output         = dst_discard,
 268        },
 269        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 270        .rt6i_protocol  = RTPROT_KERNEL,
 271        .rt6i_metric    = ~(u32) 0,
 272        .rt6i_ref       = ATOMIC_INIT(1),
 273};
 274
 275#endif
 276
 277/* allocate dst with ip6_dst_ops */
 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 279                                             struct net_device *dev,
 280                                             int flags,
 281                                             struct fib6_table *table)
 282{
 283        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 284                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 285
 286        if (rt) {
 287                struct dst_entry *dst = &rt->dst;
 288
 289                memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 290                rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 291                rt->rt6i_genid = rt_genid(net);
 292        }
 293        return rt;
 294}
 295
 296static void ip6_dst_destroy(struct dst_entry *dst)
 297{
 298        struct rt6_info *rt = (struct rt6_info *)dst;
 299        struct inet6_dev *idev = rt->rt6i_idev;
 300
 301        if (rt->n)
 302                neigh_release(rt->n);
 303
 304        if (!(rt->dst.flags & DST_HOST))
 305                dst_destroy_metrics_generic(dst);
 306
 307        if (idev) {
 308                rt->rt6i_idev = NULL;
 309                in6_dev_put(idev);
 310        }
 311
 312        if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
 313                dst_release(dst->from);
 314
 315        if (rt6_has_peer(rt)) {
 316                struct inet_peer *peer = rt6_peer_ptr(rt);
 317                inet_putpeer(peer);
 318        }
 319}
 320
 321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 322
 323static u32 rt6_peer_genid(void)
 324{
 325        return atomic_read(&__rt6_peer_genid);
 326}
 327
 328void rt6_bind_peer(struct rt6_info *rt, int create)
 329{
 330        struct inet_peer_base *base;
 331        struct inet_peer *peer;
 332
 333        base = inetpeer_base_ptr(rt->_rt6i_peer);
 334        if (!base)
 335                return;
 336
 337        peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
 338        if (peer) {
 339                if (!rt6_set_peer(rt, peer))
 340                        inet_putpeer(peer);
 341                else
 342                        rt->rt6i_peer_genid = rt6_peer_genid();
 343        }
 344}
 345
 346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 347                           int how)
 348{
 349        struct rt6_info *rt = (struct rt6_info *)dst;
 350        struct inet6_dev *idev = rt->rt6i_idev;
 351        struct net_device *loopback_dev =
 352                dev_net(dev)->loopback_dev;
 353
 354        if (dev != loopback_dev) {
 355                if (idev && idev->dev == dev) {
 356                        struct inet6_dev *loopback_idev =
 357                                in6_dev_get(loopback_dev);
 358                        if (loopback_idev) {
 359                                rt->rt6i_idev = loopback_idev;
 360                                in6_dev_put(idev);
 361                        }
 362                }
 363                if (rt->n && rt->n->dev == dev) {
 364                        rt->n->dev = loopback_dev;
 365                        dev_hold(loopback_dev);
 366                        dev_put(dev);
 367                }
 368        }
 369}
 370
 371static bool rt6_check_expired(const struct rt6_info *rt)
 372{
 373        if (rt->rt6i_flags & RTF_EXPIRES) {
 374                if (time_after(jiffies, rt->dst.expires))
 375                        return true;
 376        } else if (rt->dst.from) {
 377                return rt6_check_expired((struct rt6_info *) rt->dst.from);
 378        }
 379        return false;
 380}
 381
 382static bool rt6_need_strict(const struct in6_addr *daddr)
 383{
 384        return ipv6_addr_type(daddr) &
 385                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 386}
 387
 388/*
 389 *      Route lookup. Any table->tb6_lock is implied.
 390 */
 391
 392static inline struct rt6_info *rt6_device_match(struct net *net,
 393                                                    struct rt6_info *rt,
 394                                                    const struct in6_addr *saddr,
 395                                                    int oif,
 396                                                    int flags)
 397{
 398        struct rt6_info *local = NULL;
 399        struct rt6_info *sprt;
 400
 401        if (!oif && ipv6_addr_any(saddr))
 402                goto out;
 403
 404        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 405                struct net_device *dev = sprt->dst.dev;
 406
 407                if (oif) {
 408                        if (dev->ifindex == oif)
 409                                return sprt;
 410                        if (dev->flags & IFF_LOOPBACK) {
 411                                if (!sprt->rt6i_idev ||
 412                                    sprt->rt6i_idev->dev->ifindex != oif) {
 413                                        if (flags & RT6_LOOKUP_F_IFACE && oif)
 414                                                continue;
 415                                        if (local && (!oif ||
 416                                                      local->rt6i_idev->dev->ifindex == oif))
 417                                                continue;
 418                                }
 419                                local = sprt;
 420                        }
 421                } else {
 422                        if (ipv6_chk_addr(net, saddr, dev,
 423                                          flags & RT6_LOOKUP_F_IFACE))
 424                                return sprt;
 425                }
 426        }
 427
 428        if (oif) {
 429                if (local)
 430                        return local;
 431
 432                if (flags & RT6_LOOKUP_F_IFACE)
 433                        return net->ipv6.ip6_null_entry;
 434        }
 435out:
 436        return rt;
 437}
 438
 439#ifdef CONFIG_IPV6_ROUTER_PREF
 440static void rt6_probe(struct rt6_info *rt)
 441{
 442        struct neighbour *neigh;
 443        /*
 444         * Okay, this does not seem to be appropriate
 445         * for now, however, we need to check if it
 446         * is really so; aka Router Reachability Probing.
 447         *
 448         * Router Reachability Probe MUST be rate-limited
 449         * to no more than one per minute.
 450         */
 451        neigh = rt ? rt->n : NULL;
 452        if (!neigh || (neigh->nud_state & NUD_VALID))
 453                return;
 454        read_lock_bh(&neigh->lock);
 455        if (!(neigh->nud_state & NUD_VALID) &&
 456            time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 457                struct in6_addr mcaddr;
 458                struct in6_addr *target;
 459
 460                neigh->updated = jiffies;
 461                read_unlock_bh(&neigh->lock);
 462
 463                target = (struct in6_addr *)&neigh->primary_key;
 464                addrconf_addr_solict_mult(target, &mcaddr);
 465                ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 466        } else {
 467                read_unlock_bh(&neigh->lock);
 468        }
 469}
 470#else
 471static inline void rt6_probe(struct rt6_info *rt)
 472{
 473}
 474#endif
 475
 476/*
 477 * Default Router Selection (RFC 2461 6.3.6)
 478 */
 479static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 480{
 481        struct net_device *dev = rt->dst.dev;
 482        if (!oif || dev->ifindex == oif)
 483                return 2;
 484        if ((dev->flags & IFF_LOOPBACK) &&
 485            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 486                return 1;
 487        return 0;
 488}
 489
 490static inline int rt6_check_neigh(struct rt6_info *rt)
 491{
 492        struct neighbour *neigh;
 493        int m;
 494
 495        neigh = rt->n;
 496        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 497            !(rt->rt6i_flags & RTF_GATEWAY))
 498                m = 1;
 499        else if (neigh) {
 500                read_lock_bh(&neigh->lock);
 501                if (neigh->nud_state & NUD_VALID)
 502                        m = 2;
 503#ifdef CONFIG_IPV6_ROUTER_PREF
 504                else if (neigh->nud_state & NUD_FAILED)
 505                        m = 0;
 506#endif
 507                else
 508                        m = 1;
 509                read_unlock_bh(&neigh->lock);
 510        } else
 511                m = 0;
 512        return m;
 513}
 514
 515static int rt6_score_route(struct rt6_info *rt, int oif,
 516                           int strict)
 517{
 518        int m, n;
 519
 520        m = rt6_check_dev(rt, oif);
 521        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 522                return -1;
 523#ifdef CONFIG_IPV6_ROUTER_PREF
 524        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 525#endif
 526        n = rt6_check_neigh(rt);
 527        if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 528                return -1;
 529        return m;
 530}
 531
 532static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 533                                   int *mpri, struct rt6_info *match)
 534{
 535        int m;
 536
 537        if (rt6_check_expired(rt))
 538                goto out;
 539
 540        m = rt6_score_route(rt, oif, strict);
 541        if (m < 0)
 542                goto out;
 543
 544        if (m > *mpri) {
 545                if (strict & RT6_LOOKUP_F_REACHABLE)
 546                        rt6_probe(match);
 547                *mpri = m;
 548                match = rt;
 549        } else if (strict & RT6_LOOKUP_F_REACHABLE) {
 550                rt6_probe(rt);
 551        }
 552
 553out:
 554        return match;
 555}
 556
 557static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 558                                     struct rt6_info *rr_head,
 559                                     u32 metric, int oif, int strict)
 560{
 561        struct rt6_info *rt, *match;
 562        int mpri = -1;
 563
 564        match = NULL;
 565        for (rt = rr_head; rt && rt->rt6i_metric == metric;
 566             rt = rt->dst.rt6_next)
 567                match = find_match(rt, oif, strict, &mpri, match);
 568        for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 569             rt = rt->dst.rt6_next)
 570                match = find_match(rt, oif, strict, &mpri, match);
 571
 572        return match;
 573}
 574
 575static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 576{
 577        struct rt6_info *match, *rt0;
 578        struct net *net;
 579
 580        rt0 = fn->rr_ptr;
 581        if (!rt0)
 582                fn->rr_ptr = rt0 = fn->leaf;
 583
 584        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 585
 586        if (!match &&
 587            (strict & RT6_LOOKUP_F_REACHABLE)) {
 588                struct rt6_info *next = rt0->dst.rt6_next;
 589
 590                /* no entries matched; do round-robin */
 591                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 592                        next = fn->leaf;
 593
 594                if (next != rt0)
 595                        fn->rr_ptr = next;
 596        }
 597
 598        net = dev_net(rt0->dst.dev);
 599        return match ? match : net->ipv6.ip6_null_entry;
 600}
 601
 602#ifdef CONFIG_IPV6_ROUTE_INFO
 603int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 604                  const struct in6_addr *gwaddr)
 605{
 606        struct net *net = dev_net(dev);
 607        struct route_info *rinfo = (struct route_info *) opt;
 608        struct in6_addr prefix_buf, *prefix;
 609        unsigned int pref;
 610        unsigned long lifetime;
 611        struct rt6_info *rt;
 612
 613        if (len < sizeof(struct route_info)) {
 614                return -EINVAL;
 615        }
 616
 617        /* Sanity check for prefix_len and length */
 618        if (rinfo->length > 3) {
 619                return -EINVAL;
 620        } else if (rinfo->prefix_len > 128) {
 621                return -EINVAL;
 622        } else if (rinfo->prefix_len > 64) {
 623                if (rinfo->length < 2) {
 624                        return -EINVAL;
 625                }
 626        } else if (rinfo->prefix_len > 0) {
 627                if (rinfo->length < 1) {
 628                        return -EINVAL;
 629                }
 630        }
 631
 632        pref = rinfo->route_pref;
 633        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 634                return -EINVAL;
 635
 636        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 637
 638        if (rinfo->length == 3)
 639                prefix = (struct in6_addr *)rinfo->prefix;
 640        else {
 641                /* this function is safe */
 642                ipv6_addr_prefix(&prefix_buf,
 643                                 (struct in6_addr *)rinfo->prefix,
 644                                 rinfo->prefix_len);
 645                prefix = &prefix_buf;
 646        }
 647
 648        rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 649                                dev->ifindex);
 650
 651        if (rt && !lifetime) {
 652                ip6_del_rt(rt);
 653                rt = NULL;
 654        }
 655
 656        if (!rt && lifetime)
 657                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 658                                        pref);
 659        else if (rt)
 660                rt->rt6i_flags = RTF_ROUTEINFO |
 661                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 662
 663        if (rt) {
 664                if (!addrconf_finite_timeout(lifetime))
 665                        rt6_clean_expires(rt);
 666                else
 667                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 668
 669                dst_release(&rt->dst);
 670        }
 671        return 0;
 672}
 673#endif
 674
 675#define BACKTRACK(__net, saddr)                 \
 676do { \
 677        if (rt == __net->ipv6.ip6_null_entry) { \
 678                struct fib6_node *pn; \
 679                while (1) { \
 680                        if (fn->fn_flags & RTN_TL_ROOT) \
 681                                goto out; \
 682                        pn = fn->parent; \
 683                        if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 684                                fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 685                        else \
 686                                fn = pn; \
 687                        if (fn->fn_flags & RTN_RTINFO) \
 688                                goto restart; \
 689                } \
 690        } \
 691} while (0)
 692
 693static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 694                                             struct fib6_table *table,
 695                                             struct flowi6 *fl6, int flags)
 696{
 697        struct fib6_node *fn;
 698        struct rt6_info *rt;
 699
 700        read_lock_bh(&table->tb6_lock);
 701        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 702restart:
 703        rt = fn->leaf;
 704        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 705        BACKTRACK(net, &fl6->saddr);
 706out:
 707        dst_use(&rt->dst, jiffies);
 708        read_unlock_bh(&table->tb6_lock);
 709        return rt;
 710
 711}
 712
 713struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 714                                    int flags)
 715{
 716        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 717}
 718EXPORT_SYMBOL_GPL(ip6_route_lookup);
 719
 720struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 721                            const struct in6_addr *saddr, int oif, int strict)
 722{
 723        struct flowi6 fl6 = {
 724                .flowi6_oif = oif,
 725                .daddr = *daddr,
 726        };
 727        struct dst_entry *dst;
 728        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 729
 730        if (saddr) {
 731                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 732                flags |= RT6_LOOKUP_F_HAS_SADDR;
 733        }
 734
 735        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 736        if (dst->error == 0)
 737                return (struct rt6_info *) dst;
 738
 739        dst_release(dst);
 740
 741        return NULL;
 742}
 743
 744EXPORT_SYMBOL(rt6_lookup);
 745
 746/* ip6_ins_rt is called with FREE table->tb6_lock.
 747   It takes new route entry, the addition fails by any reason the
 748   route is freed. In any case, if caller does not hold it, it may
 749   be destroyed.
 750 */
 751
 752static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 753{
 754        int err;
 755        struct fib6_table *table;
 756
 757        table = rt->rt6i_table;
 758        write_lock_bh(&table->tb6_lock);
 759        err = fib6_add(&table->tb6_root, rt, info);
 760        write_unlock_bh(&table->tb6_lock);
 761
 762        return err;
 763}
 764
 765int ip6_ins_rt(struct rt6_info *rt)
 766{
 767        struct nl_info info = {
 768                .nl_net = dev_net(rt->dst.dev),
 769        };
 770        return __ip6_ins_rt(rt, &info);
 771}
 772
 773static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 774                                      const struct in6_addr *daddr,
 775                                      const struct in6_addr *saddr)
 776{
 777        struct rt6_info *rt;
 778
 779        /*
 780         *      Clone the route.
 781         */
 782
 783        rt = ip6_rt_copy(ort, daddr);
 784
 785        if (rt) {
 786                int attempts = !in_softirq();
 787
 788                if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 789                        if (ort->rt6i_dst.plen != 128 &&
 790                            ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 791                                rt->rt6i_flags |= RTF_ANYCAST;
 792                        rt->rt6i_gateway = *daddr;
 793                }
 794
 795                rt->rt6i_flags |= RTF_CACHE;
 796
 797#ifdef CONFIG_IPV6_SUBTREES
 798                if (rt->rt6i_src.plen && saddr) {
 799                        rt->rt6i_src.addr = *saddr;
 800                        rt->rt6i_src.plen = 128;
 801                }
 802#endif
 803
 804        retry:
 805                if (rt6_bind_neighbour(rt, rt->dst.dev)) {
 806                        struct net *net = dev_net(rt->dst.dev);
 807                        int saved_rt_min_interval =
 808                                net->ipv6.sysctl.ip6_rt_gc_min_interval;
 809                        int saved_rt_elasticity =
 810                                net->ipv6.sysctl.ip6_rt_gc_elasticity;
 811
 812                        if (attempts-- > 0) {
 813                                net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 814                                net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 815
 816                                ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 817
 818                                net->ipv6.sysctl.ip6_rt_gc_elasticity =
 819                                        saved_rt_elasticity;
 820                                net->ipv6.sysctl.ip6_rt_gc_min_interval =
 821                                        saved_rt_min_interval;
 822                                goto retry;
 823                        }
 824
 825                        net_warn_ratelimited("Neighbour table overflow\n");
 826                        dst_free(&rt->dst);
 827                        return NULL;
 828                }
 829        }
 830
 831        return rt;
 832}
 833
 834static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 835                                        const struct in6_addr *daddr)
 836{
 837        struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 838
 839        if (rt) {
 840                rt->rt6i_flags |= RTF_CACHE;
 841                rt->n = neigh_clone(ort->n);
 842        }
 843        return rt;
 844}
 845
 846static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 847                                      struct flowi6 *fl6, int flags)
 848{
 849        struct fib6_node *fn;
 850        struct rt6_info *rt, *nrt;
 851        int strict = 0;
 852        int attempts = 3;
 853        int err;
 854        int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 855
 856        strict |= flags & RT6_LOOKUP_F_IFACE;
 857
 858relookup:
 859        read_lock_bh(&table->tb6_lock);
 860
 861restart_2:
 862        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 863
 864restart:
 865        rt = rt6_select(fn, oif, strict | reachable);
 866
 867        BACKTRACK(net, &fl6->saddr);
 868        if (rt == net->ipv6.ip6_null_entry ||
 869            rt->rt6i_flags & RTF_CACHE)
 870                goto out;
 871
 872        dst_hold(&rt->dst);
 873        read_unlock_bh(&table->tb6_lock);
 874
 875        if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
 876                nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 877        else if (!(rt->dst.flags & DST_HOST))
 878                nrt = rt6_alloc_clone(rt, &fl6->daddr);
 879        else
 880                goto out2;
 881
 882        dst_release(&rt->dst);
 883        rt = nrt ? : net->ipv6.ip6_null_entry;
 884
 885        dst_hold(&rt->dst);
 886        if (nrt) {
 887                err = ip6_ins_rt(nrt);
 888                if (!err)
 889                        goto out2;
 890        }
 891
 892        if (--attempts <= 0)
 893                goto out2;
 894
 895        /*
 896         * Race condition! In the gap, when table->tb6_lock was
 897         * released someone could insert this route.  Relookup.
 898         */
 899        dst_release(&rt->dst);
 900        goto relookup;
 901
 902out:
 903        if (reachable) {
 904                reachable = 0;
 905                goto restart_2;
 906        }
 907        dst_hold(&rt->dst);
 908        read_unlock_bh(&table->tb6_lock);
 909out2:
 910        rt->dst.lastuse = jiffies;
 911        rt->dst.__use++;
 912
 913        return rt;
 914}
 915
 916static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 917                                            struct flowi6 *fl6, int flags)
 918{
 919        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 920}
 921
 922static struct dst_entry *ip6_route_input_lookup(struct net *net,
 923                                                struct net_device *dev,
 924                                                struct flowi6 *fl6, int flags)
 925{
 926        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 927                flags |= RT6_LOOKUP_F_IFACE;
 928
 929        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 930}
 931
 932void ip6_route_input(struct sk_buff *skb)
 933{
 934        const struct ipv6hdr *iph = ipv6_hdr(skb);
 935        struct net *net = dev_net(skb->dev);
 936        int flags = RT6_LOOKUP_F_HAS_SADDR;
 937        struct flowi6 fl6 = {
 938                .flowi6_iif = skb->dev->ifindex,
 939                .daddr = iph->daddr,
 940                .saddr = iph->saddr,
 941                .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
 942                .flowi6_mark = skb->mark,
 943                .flowi6_proto = iph->nexthdr,
 944        };
 945
 946        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 947}
 948
 949static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 950                                             struct flowi6 *fl6, int flags)
 951{
 952        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 953}
 954
 955struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 956                                    struct flowi6 *fl6)
 957{
 958        int flags = 0;
 959
 960        fl6->flowi6_iif = LOOPBACK_IFINDEX;
 961
 962        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 963                flags |= RT6_LOOKUP_F_IFACE;
 964
 965        if (!ipv6_addr_any(&fl6->saddr))
 966                flags |= RT6_LOOKUP_F_HAS_SADDR;
 967        else if (sk)
 968                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 969
 970        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 971}
 972
 973EXPORT_SYMBOL(ip6_route_output);
 974
 975struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 976{
 977        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 978        struct dst_entry *new = NULL;
 979
 980        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
 981        if (rt) {
 982                new = &rt->dst;
 983
 984                memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
 985                rt6_init_peer(rt, net->ipv6.peers);
 986
 987                new->__use = 1;
 988                new->input = dst_discard;
 989                new->output = dst_discard;
 990
 991                if (dst_metrics_read_only(&ort->dst))
 992                        new->_metrics = ort->dst._metrics;
 993                else
 994                        dst_copy_metrics(new, &ort->dst);
 995                rt->rt6i_idev = ort->rt6i_idev;
 996                if (rt->rt6i_idev)
 997                        in6_dev_hold(rt->rt6i_idev);
 998
 999                rt->rt6i_gateway = ort->rt6i_gateway;
1000                rt->rt6i_flags = ort->rt6i_flags;
1001                rt6_clean_expires(rt);
1002                rt->rt6i_metric = 0;
1003
1004                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1005#ifdef CONFIG_IPV6_SUBTREES
1006                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1007#endif
1008
1009                dst_free(new);
1010        }
1011
1012        dst_release(dst_orig);
1013        return new ? new : ERR_PTR(-ENOMEM);
1014}
1015
1016/*
1017 *      Destination cache support functions
1018 */
1019
1020static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1021{
1022        struct rt6_info *rt;
1023
1024        rt = (struct rt6_info *) dst;
1025
1026        /* All IPV6 dsts are created with ->obsolete set to the value
1027         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1028         * into this function always.
1029         */
1030        if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1031                return NULL;
1032
1033        if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1034                if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1035                        if (!rt6_has_peer(rt))
1036                                rt6_bind_peer(rt, 0);
1037                        rt->rt6i_peer_genid = rt6_peer_genid();
1038                }
1039                return dst;
1040        }
1041        return NULL;
1042}
1043
1044static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1045{
1046        struct rt6_info *rt = (struct rt6_info *) dst;
1047
1048        if (rt) {
1049                if (rt->rt6i_flags & RTF_CACHE) {
1050                        if (rt6_check_expired(rt)) {
1051                                ip6_del_rt(rt);
1052                                dst = NULL;
1053                        }
1054                } else {
1055                        dst_release(dst);
1056                        dst = NULL;
1057                }
1058        }
1059        return dst;
1060}
1061
1062static void ip6_link_failure(struct sk_buff *skb)
1063{
1064        struct rt6_info *rt;
1065
1066        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1067
1068        rt = (struct rt6_info *) skb_dst(skb);
1069        if (rt) {
1070                if (rt->rt6i_flags & RTF_CACHE)
1071                        rt6_update_expires(rt, 0);
1072                else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1073                        rt->rt6i_node->fn_sernum = -1;
1074        }
1075}
1076
1077static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1078                               struct sk_buff *skb, u32 mtu)
1079{
1080        struct rt6_info *rt6 = (struct rt6_info*)dst;
1081
1082        dst_confirm(dst);
1083        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1084                struct net *net = dev_net(dst->dev);
1085
1086                rt6->rt6i_flags |= RTF_MODIFIED;
1087                if (mtu < IPV6_MIN_MTU) {
1088                        u32 features = dst_metric(dst, RTAX_FEATURES);
1089                        mtu = IPV6_MIN_MTU;
1090                        features |= RTAX_FEATURE_ALLFRAG;
1091                        dst_metric_set(dst, RTAX_FEATURES, features);
1092                }
1093                dst_metric_set(dst, RTAX_MTU, mtu);
1094                rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1095        }
1096}
1097
1098void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1099                     int oif, u32 mark)
1100{
1101        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1102        struct dst_entry *dst;
1103        struct flowi6 fl6;
1104
1105        memset(&fl6, 0, sizeof(fl6));
1106        fl6.flowi6_oif = oif;
1107        fl6.flowi6_mark = mark;
1108        fl6.flowi6_flags = 0;
1109        fl6.daddr = iph->daddr;
1110        fl6.saddr = iph->saddr;
1111        fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1112
1113        dst = ip6_route_output(net, NULL, &fl6);
1114        if (!dst->error)
1115                ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1116        dst_release(dst);
1117}
1118EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1119
1120void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1121{
1122        ip6_update_pmtu(skb, sock_net(sk), mtu,
1123                        sk->sk_bound_dev_if, sk->sk_mark);
1124}
1125EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1126
1127void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1128{
1129        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1130        struct dst_entry *dst;
1131        struct flowi6 fl6;
1132
1133        memset(&fl6, 0, sizeof(fl6));
1134        fl6.flowi6_oif = oif;
1135        fl6.flowi6_mark = mark;
1136        fl6.flowi6_flags = 0;
1137        fl6.daddr = iph->daddr;
1138        fl6.saddr = iph->saddr;
1139        fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1140
1141        dst = ip6_route_output(net, NULL, &fl6);
1142        if (!dst->error)
1143                rt6_do_redirect(dst, NULL, skb);
1144        dst_release(dst);
1145}
1146EXPORT_SYMBOL_GPL(ip6_redirect);
1147
1148void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1149{
1150        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1151}
1152EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1153
1154static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1155{
1156        struct net_device *dev = dst->dev;
1157        unsigned int mtu = dst_mtu(dst);
1158        struct net *net = dev_net(dev);
1159
1160        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1161
1162        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1163                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1164
1165        /*
1166         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1167         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1168         * IPV6_MAXPLEN is also valid and means: "any MSS,
1169         * rely only on pmtu discovery"
1170         */
1171        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1172                mtu = IPV6_MAXPLEN;
1173        return mtu;
1174}
1175
1176static unsigned int ip6_mtu(const struct dst_entry *dst)
1177{
1178        struct inet6_dev *idev;
1179        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1180
1181        if (mtu)
1182                return mtu;
1183
1184        mtu = IPV6_MIN_MTU;
1185
1186        rcu_read_lock();
1187        idev = __in6_dev_get(dst->dev);
1188        if (idev)
1189                mtu = idev->cnf.mtu6;
1190        rcu_read_unlock();
1191
1192        return mtu;
1193}
1194
1195static struct dst_entry *icmp6_dst_gc_list;
1196static DEFINE_SPINLOCK(icmp6_dst_lock);
1197
1198struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1199                                  struct neighbour *neigh,
1200                                  struct flowi6 *fl6)
1201{
1202        struct dst_entry *dst;
1203        struct rt6_info *rt;
1204        struct inet6_dev *idev = in6_dev_get(dev);
1205        struct net *net = dev_net(dev);
1206
1207        if (unlikely(!idev))
1208                return ERR_PTR(-ENODEV);
1209
1210        rt = ip6_dst_alloc(net, dev, 0, NULL);
1211        if (unlikely(!rt)) {
1212                in6_dev_put(idev);
1213                dst = ERR_PTR(-ENOMEM);
1214                goto out;
1215        }
1216
1217        if (neigh)
1218                neigh_hold(neigh);
1219        else {
1220                neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1221                if (IS_ERR(neigh)) {
1222                        in6_dev_put(idev);
1223                        dst_free(&rt->dst);
1224                        return ERR_CAST(neigh);
1225                }
1226        }
1227
1228        rt->dst.flags |= DST_HOST;
1229        rt->dst.output  = ip6_output;
1230        rt->n = neigh;
1231        atomic_set(&rt->dst.__refcnt, 1);
1232        rt->rt6i_dst.addr = fl6->daddr;
1233        rt->rt6i_dst.plen = 128;
1234        rt->rt6i_idev     = idev;
1235        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1236
1237        spin_lock_bh(&icmp6_dst_lock);
1238        rt->dst.next = icmp6_dst_gc_list;
1239        icmp6_dst_gc_list = &rt->dst;
1240        spin_unlock_bh(&icmp6_dst_lock);
1241
1242        fib6_force_start_gc(net);
1243
1244        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1245
1246out:
1247        return dst;
1248}
1249
1250int icmp6_dst_gc(void)
1251{
1252        struct dst_entry *dst, **pprev;
1253        int more = 0;
1254
1255        spin_lock_bh(&icmp6_dst_lock);
1256        pprev = &icmp6_dst_gc_list;
1257
1258        while ((dst = *pprev) != NULL) {
1259                if (!atomic_read(&dst->__refcnt)) {
1260                        *pprev = dst->next;
1261                        dst_free(dst);
1262                } else {
1263                        pprev = &dst->next;
1264                        ++more;
1265                }
1266        }
1267
1268        spin_unlock_bh(&icmp6_dst_lock);
1269
1270        return more;
1271}
1272
1273static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1274                            void *arg)
1275{
1276        struct dst_entry *dst, **pprev;
1277
1278        spin_lock_bh(&icmp6_dst_lock);
1279        pprev = &icmp6_dst_gc_list;
1280        while ((dst = *pprev) != NULL) {
1281                struct rt6_info *rt = (struct rt6_info *) dst;
1282                if (func(rt, arg)) {
1283                        *pprev = dst->next;
1284                        dst_free(dst);
1285                } else {
1286                        pprev = &dst->next;
1287                }
1288        }
1289        spin_unlock_bh(&icmp6_dst_lock);
1290}
1291
1292static int ip6_dst_gc(struct dst_ops *ops)
1293{
1294        unsigned long now = jiffies;
1295        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1296        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1297        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1298        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1299        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1300        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1301        int entries;
1302
1303        entries = dst_entries_get_fast(ops);
1304        if (time_after(rt_last_gc + rt_min_interval, now) &&
1305            entries <= rt_max_size)
1306                goto out;
1307
1308        net->ipv6.ip6_rt_gc_expire++;
1309        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1310        net->ipv6.ip6_rt_last_gc = now;
1311        entries = dst_entries_get_slow(ops);
1312        if (entries < ops->gc_thresh)
1313                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1314out:
1315        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1316        return entries > rt_max_size;
1317}
1318
1319/* Clean host part of a prefix. Not necessary in radix tree,
1320   but results in cleaner routing tables.
1321
1322   Remove it only when all the things will work!
1323 */
1324
1325int ip6_dst_hoplimit(struct dst_entry *dst)
1326{
1327        int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1328        if (hoplimit == 0) {
1329                struct net_device *dev = dst->dev;
1330                struct inet6_dev *idev;
1331
1332                rcu_read_lock();
1333                idev = __in6_dev_get(dev);
1334                if (idev)
1335                        hoplimit = idev->cnf.hop_limit;
1336                else
1337                        hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1338                rcu_read_unlock();
1339        }
1340        return hoplimit;
1341}
1342EXPORT_SYMBOL(ip6_dst_hoplimit);
1343
1344/*
1345 *
1346 */
1347
1348int ip6_route_add(struct fib6_config *cfg)
1349{
1350        int err;
1351        struct net *net = cfg->fc_nlinfo.nl_net;
1352        struct rt6_info *rt = NULL;
1353        struct net_device *dev = NULL;
1354        struct inet6_dev *idev = NULL;
1355        struct fib6_table *table;
1356        int addr_type;
1357
1358        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1359                return -EINVAL;
1360#ifndef CONFIG_IPV6_SUBTREES
1361        if (cfg->fc_src_len)
1362                return -EINVAL;
1363#endif
1364        if (cfg->fc_ifindex) {
1365                err = -ENODEV;
1366                dev = dev_get_by_index(net, cfg->fc_ifindex);
1367                if (!dev)
1368                        goto out;
1369                idev = in6_dev_get(dev);
1370                if (!idev)
1371                        goto out;
1372        }
1373
1374        if (cfg->fc_metric == 0)
1375                cfg->fc_metric = IP6_RT_PRIO_USER;
1376
1377        err = -ENOBUFS;
1378        if (cfg->fc_nlinfo.nlh &&
1379            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1380                table = fib6_get_table(net, cfg->fc_table);
1381                if (!table) {
1382                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1383                        table = fib6_new_table(net, cfg->fc_table);
1384                }
1385        } else {
1386                table = fib6_new_table(net, cfg->fc_table);
1387        }
1388
1389        if (!table)
1390                goto out;
1391
1392        rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1393
1394        if (!rt) {
1395                err = -ENOMEM;
1396                goto out;
1397        }
1398
1399        if (cfg->fc_flags & RTF_EXPIRES)
1400                rt6_set_expires(rt, jiffies +
1401                                clock_t_to_jiffies(cfg->fc_expires));
1402        else
1403                rt6_clean_expires(rt);
1404
1405        if (cfg->fc_protocol == RTPROT_UNSPEC)
1406                cfg->fc_protocol = RTPROT_BOOT;
1407        rt->rt6i_protocol = cfg->fc_protocol;
1408
1409        addr_type = ipv6_addr_type(&cfg->fc_dst);
1410
1411        if (addr_type & IPV6_ADDR_MULTICAST)
1412                rt->dst.input = ip6_mc_input;
1413        else if (cfg->fc_flags & RTF_LOCAL)
1414                rt->dst.input = ip6_input;
1415        else
1416                rt->dst.input = ip6_forward;
1417
1418        rt->dst.output = ip6_output;
1419
1420        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1421        rt->rt6i_dst.plen = cfg->fc_dst_len;
1422        if (rt->rt6i_dst.plen == 128)
1423               rt->dst.flags |= DST_HOST;
1424
1425        if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1426                u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1427                if (!metrics) {
1428                        err = -ENOMEM;
1429                        goto out;
1430                }
1431                dst_init_metrics(&rt->dst, metrics, 0);
1432        }
1433#ifdef CONFIG_IPV6_SUBTREES
1434        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1435        rt->rt6i_src.plen = cfg->fc_src_len;
1436#endif
1437
1438        rt->rt6i_metric = cfg->fc_metric;
1439
1440        /* We cannot add true routes via loopback here,
1441           they would result in kernel looping; promote them to reject routes
1442         */
1443        if ((cfg->fc_flags & RTF_REJECT) ||
1444            (dev && (dev->flags & IFF_LOOPBACK) &&
1445             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1446             !(cfg->fc_flags & RTF_LOCAL))) {
1447                /* hold loopback dev/idev if we haven't done so. */
1448                if (dev != net->loopback_dev) {
1449                        if (dev) {
1450                                dev_put(dev);
1451                                in6_dev_put(idev);
1452                        }
1453                        dev = net->loopback_dev;
1454                        dev_hold(dev);
1455                        idev = in6_dev_get(dev);
1456                        if (!idev) {
1457                                err = -ENODEV;
1458                                goto out;
1459                        }
1460                }
1461                rt->dst.output = ip6_pkt_discard_out;
1462                rt->dst.input = ip6_pkt_discard;
1463                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1464                switch (cfg->fc_type) {
1465                case RTN_BLACKHOLE:
1466                        rt->dst.error = -EINVAL;
1467                        break;
1468                case RTN_PROHIBIT:
1469                        rt->dst.error = -EACCES;
1470                        break;
1471                case RTN_THROW:
1472                        rt->dst.error = -EAGAIN;
1473                        break;
1474                default:
1475                        rt->dst.error = -ENETUNREACH;
1476                        break;
1477                }
1478                goto install_route;
1479        }
1480
1481        if (cfg->fc_flags & RTF_GATEWAY) {
1482                const struct in6_addr *gw_addr;
1483                int gwa_type;
1484
1485                gw_addr = &cfg->fc_gateway;
1486                rt->rt6i_gateway = *gw_addr;
1487                gwa_type = ipv6_addr_type(gw_addr);
1488
1489                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1490                        struct rt6_info *grt;
1491
1492                        /* IPv6 strictly inhibits using not link-local
1493                           addresses as nexthop address.
1494                           Otherwise, router will not able to send redirects.
1495                           It is very good, but in some (rare!) circumstances
1496                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1497                           some exceptions. --ANK
1498                         */
1499                        err = -EINVAL;
1500                        if (!(gwa_type & IPV6_ADDR_UNICAST))
1501                                goto out;
1502
1503                        grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1504
1505                        err = -EHOSTUNREACH;
1506                        if (!grt)
1507                                goto out;
1508                        if (dev) {
1509                                if (dev != grt->dst.dev) {
1510                                        dst_release(&grt->dst);
1511                                        goto out;
1512                                }
1513                        } else {
1514                                dev = grt->dst.dev;
1515                                idev = grt->rt6i_idev;
1516                                dev_hold(dev);
1517                                in6_dev_hold(grt->rt6i_idev);
1518                        }
1519                        if (!(grt->rt6i_flags & RTF_GATEWAY))
1520                                err = 0;
1521                        dst_release(&grt->dst);
1522
1523                        if (err)
1524                                goto out;
1525                }
1526                err = -EINVAL;
1527                if (!dev || (dev->flags & IFF_LOOPBACK))
1528                        goto out;
1529        }
1530
1531        err = -ENODEV;
1532        if (!dev)
1533                goto out;
1534
1535        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1536                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1537                        err = -EINVAL;
1538                        goto out;
1539                }
1540                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1541                rt->rt6i_prefsrc.plen = 128;
1542        } else
1543                rt->rt6i_prefsrc.plen = 0;
1544
1545        if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1546                err = rt6_bind_neighbour(rt, dev);
1547                if (err)
1548                        goto out;
1549        }
1550
1551        rt->rt6i_flags = cfg->fc_flags;
1552
1553install_route:
1554        if (cfg->fc_mx) {
1555                struct nlattr *nla;
1556                int remaining;
1557
1558                nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1559                        int type = nla_type(nla);
1560
1561                        if (type) {
1562                                if (type > RTAX_MAX) {
1563                                        err = -EINVAL;
1564                                        goto out;
1565                                }
1566
1567                                dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1568                        }
1569                }
1570        }
1571
1572        rt->dst.dev = dev;
1573        rt->rt6i_idev = idev;
1574        rt->rt6i_table = table;
1575
1576        cfg->fc_nlinfo.nl_net = dev_net(dev);
1577
1578        return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1579
1580out:
1581        if (dev)
1582                dev_put(dev);
1583        if (idev)
1584                in6_dev_put(idev);
1585        if (rt)
1586                dst_free(&rt->dst);
1587        return err;
1588}
1589
1590static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1591{
1592        int err;
1593        struct fib6_table *table;
1594        struct net *net = dev_net(rt->dst.dev);
1595
1596        if (rt == net->ipv6.ip6_null_entry) {
1597                err = -ENOENT;
1598                goto out;
1599        }
1600
1601        table = rt->rt6i_table;
1602        write_lock_bh(&table->tb6_lock);
1603        err = fib6_del(rt, info);
1604        write_unlock_bh(&table->tb6_lock);
1605
1606out:
1607        dst_release(&rt->dst);
1608        return err;
1609}
1610
1611int ip6_del_rt(struct rt6_info *rt)
1612{
1613        struct nl_info info = {
1614                .nl_net = dev_net(rt->dst.dev),
1615        };
1616        return __ip6_del_rt(rt, &info);
1617}
1618
1619static int ip6_route_del(struct fib6_config *cfg)
1620{
1621        struct fib6_table *table;
1622        struct fib6_node *fn;
1623        struct rt6_info *rt;
1624        int err = -ESRCH;
1625
1626        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1627        if (!table)
1628                return err;
1629
1630        read_lock_bh(&table->tb6_lock);
1631
1632        fn = fib6_locate(&table->tb6_root,
1633                         &cfg->fc_dst, cfg->fc_dst_len,
1634                         &cfg->fc_src, cfg->fc_src_len);
1635
1636        if (fn) {
1637                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1638                        if (cfg->fc_ifindex &&
1639                            (!rt->dst.dev ||
1640                             rt->dst.dev->ifindex != cfg->fc_ifindex))
1641                                continue;
1642                        if (cfg->fc_flags & RTF_GATEWAY &&
1643                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1644                                continue;
1645                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1646                                continue;
1647                        dst_hold(&rt->dst);
1648                        read_unlock_bh(&table->tb6_lock);
1649
1650                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1651                }
1652        }
1653        read_unlock_bh(&table->tb6_lock);
1654
1655        return err;
1656}
1657
1658static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1659{
1660        struct net *net = dev_net(skb->dev);
1661        struct netevent_redirect netevent;
1662        struct rt6_info *rt, *nrt = NULL;
1663        const struct in6_addr *target;
1664        struct ndisc_options ndopts;
1665        const struct in6_addr *dest;
1666        struct neighbour *old_neigh;
1667        struct inet6_dev *in6_dev;
1668        struct neighbour *neigh;
1669        struct icmp6hdr *icmph;
1670        int optlen, on_link;
1671        u8 *lladdr;
1672
1673        optlen = skb->tail - skb->transport_header;
1674        optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1675
1676        if (optlen < 0) {
1677                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1678                return;
1679        }
1680
1681        icmph = icmp6_hdr(skb);
1682        target = (const struct in6_addr *) (icmph + 1);
1683        dest = target + 1;
1684
1685        if (ipv6_addr_is_multicast(dest)) {
1686                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1687                return;
1688        }
1689
1690        on_link = 0;
1691        if (ipv6_addr_equal(dest, target)) {
1692                on_link = 1;
1693        } else if (ipv6_addr_type(target) !=
1694                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1695                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1696                return;
1697        }
1698
1699        in6_dev = __in6_dev_get(skb->dev);
1700        if (!in6_dev)
1701                return;
1702        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1703                return;
1704
1705        /* RFC2461 8.1:
1706         *      The IP source address of the Redirect MUST be the same as the current
1707         *      first-hop router for the specified ICMP Destination Address.
1708         */
1709
1710        if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1711                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1712                return;
1713        }
1714
1715        lladdr = NULL;
1716        if (ndopts.nd_opts_tgt_lladdr) {
1717                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1718                                             skb->dev);
1719                if (!lladdr) {
1720                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1721                        return;
1722                }
1723        }
1724
1725        rt = (struct rt6_info *) dst;
1726        if (rt == net->ipv6.ip6_null_entry) {
1727                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1728                return;
1729        }
1730
1731        /* Redirect received -> path was valid.
1732         * Look, redirects are sent only in response to data packets,
1733         * so that this nexthop apparently is reachable. --ANK
1734         */
1735        dst_confirm(&rt->dst);
1736
1737        neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1738        if (!neigh)
1739                return;
1740
1741        /* Duplicate redirect: silently ignore. */
1742        old_neigh = rt->n;
1743        if (neigh == old_neigh)
1744                goto out;
1745
1746        /*
1747         *      We have finally decided to accept it.
1748         */
1749
1750        neigh_update(neigh, lladdr, NUD_STALE,
1751                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1752                     NEIGH_UPDATE_F_OVERRIDE|
1753                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1754                                     NEIGH_UPDATE_F_ISROUTER))
1755                     );
1756
1757        nrt = ip6_rt_copy(rt, dest);
1758        if (!nrt)
1759                goto out;
1760
1761        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1762        if (on_link)
1763                nrt->rt6i_flags &= ~RTF_GATEWAY;
1764
1765        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1766        nrt->n = neigh_clone(neigh);
1767
1768        if (ip6_ins_rt(nrt))
1769                goto out;
1770
1771        netevent.old = &rt->dst;
1772        netevent.old_neigh = old_neigh;
1773        netevent.new = &nrt->dst;
1774        netevent.new_neigh = neigh;
1775        netevent.daddr = dest;
1776        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1777
1778        if (rt->rt6i_flags & RTF_CACHE) {
1779                rt = (struct rt6_info *) dst_clone(&rt->dst);
1780                ip6_del_rt(rt);
1781        }
1782
1783out:
1784        neigh_release(neigh);
1785}
1786
1787/*
1788 *      Misc support functions
1789 */
1790
1791static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1792                                    const struct in6_addr *dest)
1793{
1794        struct net *net = dev_net(ort->dst.dev);
1795        struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1796                                            ort->rt6i_table);
1797
1798        if (rt) {
1799                rt->dst.input = ort->dst.input;
1800                rt->dst.output = ort->dst.output;
1801                rt->dst.flags |= DST_HOST;
1802
1803                rt->rt6i_dst.addr = *dest;
1804                rt->rt6i_dst.plen = 128;
1805                dst_copy_metrics(&rt->dst, &ort->dst);
1806                rt->dst.error = ort->dst.error;
1807                rt->rt6i_idev = ort->rt6i_idev;
1808                if (rt->rt6i_idev)
1809                        in6_dev_hold(rt->rt6i_idev);
1810                rt->dst.lastuse = jiffies;
1811
1812                rt->rt6i_gateway = ort->rt6i_gateway;
1813                rt->rt6i_flags = ort->rt6i_flags;
1814                if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1815                    (RTF_DEFAULT | RTF_ADDRCONF))
1816                        rt6_set_from(rt, ort);
1817                else
1818                        rt6_clean_expires(rt);
1819                rt->rt6i_metric = 0;
1820
1821#ifdef CONFIG_IPV6_SUBTREES
1822                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1823#endif
1824                memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1825                rt->rt6i_table = ort->rt6i_table;
1826        }
1827        return rt;
1828}
1829
1830#ifdef CONFIG_IPV6_ROUTE_INFO
1831static struct rt6_info *rt6_get_route_info(struct net *net,
1832                                           const struct in6_addr *prefix, int prefixlen,
1833                                           const struct in6_addr *gwaddr, int ifindex)
1834{
1835        struct fib6_node *fn;
1836        struct rt6_info *rt = NULL;
1837        struct fib6_table *table;
1838
1839        table = fib6_get_table(net, RT6_TABLE_INFO);
1840        if (!table)
1841                return NULL;
1842
1843        read_lock_bh(&table->tb6_lock);
1844        fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1845        if (!fn)
1846                goto out;
1847
1848        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1849                if (rt->dst.dev->ifindex != ifindex)
1850                        continue;
1851                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1852                        continue;
1853                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1854                        continue;
1855                dst_hold(&rt->dst);
1856                break;
1857        }
1858out:
1859        read_unlock_bh(&table->tb6_lock);
1860        return rt;
1861}
1862
1863static struct rt6_info *rt6_add_route_info(struct net *net,
1864                                           const struct in6_addr *prefix, int prefixlen,
1865                                           const struct in6_addr *gwaddr, int ifindex,
1866                                           unsigned int pref)
1867{
1868        struct fib6_config cfg = {
1869                .fc_table       = RT6_TABLE_INFO,
1870                .fc_metric      = IP6_RT_PRIO_USER,
1871                .fc_ifindex     = ifindex,
1872                .fc_dst_len     = prefixlen,
1873                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1874                                  RTF_UP | RTF_PREF(pref),
1875                .fc_nlinfo.portid = 0,
1876                .fc_nlinfo.nlh = NULL,
1877                .fc_nlinfo.nl_net = net,
1878        };
1879
1880        cfg.fc_dst = *prefix;
1881        cfg.fc_gateway = *gwaddr;
1882
1883        /* We should treat it as a default route if prefix length is 0. */
1884        if (!prefixlen)
1885                cfg.fc_flags |= RTF_DEFAULT;
1886
1887        ip6_route_add(&cfg);
1888
1889        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1890}
1891#endif
1892
1893struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1894{
1895        struct rt6_info *rt;
1896        struct fib6_table *table;
1897
1898        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1899        if (!table)
1900                return NULL;
1901
1902        read_lock_bh(&table->tb6_lock);
1903        for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1904                if (dev == rt->dst.dev &&
1905                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1906                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1907                        break;
1908        }
1909        if (rt)
1910                dst_hold(&rt->dst);
1911        read_unlock_bh(&table->tb6_lock);
1912        return rt;
1913}
1914
1915struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1916                                     struct net_device *dev,
1917                                     unsigned int pref)
1918{
1919        struct fib6_config cfg = {
1920                .fc_table       = RT6_TABLE_DFLT,
1921                .fc_metric      = IP6_RT_PRIO_USER,
1922                .fc_ifindex     = dev->ifindex,
1923                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1924                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1925                .fc_nlinfo.portid = 0,
1926                .fc_nlinfo.nlh = NULL,
1927                .fc_nlinfo.nl_net = dev_net(dev),
1928        };
1929
1930        cfg.fc_gateway = *gwaddr;
1931
1932        ip6_route_add(&cfg);
1933
1934        return rt6_get_dflt_router(gwaddr, dev);
1935}
1936
1937void rt6_purge_dflt_routers(struct net *net)
1938{
1939        struct rt6_info *rt;
1940        struct fib6_table *table;
1941
1942        /* NOTE: Keep consistent with rt6_get_dflt_router */
1943        table = fib6_get_table(net, RT6_TABLE_DFLT);
1944        if (!table)
1945                return;
1946
1947restart:
1948        read_lock_bh(&table->tb6_lock);
1949        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1950                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1951                        dst_hold(&rt->dst);
1952                        read_unlock_bh(&table->tb6_lock);
1953                        ip6_del_rt(rt);
1954                        goto restart;
1955                }
1956        }
1957        read_unlock_bh(&table->tb6_lock);
1958}
1959
1960static void rtmsg_to_fib6_config(struct net *net,
1961                                 struct in6_rtmsg *rtmsg,
1962                                 struct fib6_config *cfg)
1963{
1964        memset(cfg, 0, sizeof(*cfg));
1965
1966        cfg->fc_table = RT6_TABLE_MAIN;
1967        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1968        cfg->fc_metric = rtmsg->rtmsg_metric;
1969        cfg->fc_expires = rtmsg->rtmsg_info;
1970        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1971        cfg->fc_src_len = rtmsg->rtmsg_src_len;
1972        cfg->fc_flags = rtmsg->rtmsg_flags;
1973
1974        cfg->fc_nlinfo.nl_net = net;
1975
1976        cfg->fc_dst = rtmsg->rtmsg_dst;
1977        cfg->fc_src = rtmsg->rtmsg_src;
1978        cfg->fc_gateway = rtmsg->rtmsg_gateway;
1979}
1980
1981int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1982{
1983        struct fib6_config cfg;
1984        struct in6_rtmsg rtmsg;
1985        int err;
1986
1987        switch(cmd) {
1988        case SIOCADDRT:         /* Add a route */
1989        case SIOCDELRT:         /* Delete a route */
1990                if (!capable(CAP_NET_ADMIN))
1991                        return -EPERM;
1992                err = copy_from_user(&rtmsg, arg,
1993                                     sizeof(struct in6_rtmsg));
1994                if (err)
1995                        return -EFAULT;
1996
1997                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1998
1999                rtnl_lock();
2000                switch (cmd) {
2001                case SIOCADDRT:
2002                        err = ip6_route_add(&cfg);
2003                        break;
2004                case SIOCDELRT:
2005                        err = ip6_route_del(&cfg);
2006                        break;
2007                default:
2008                        err = -EINVAL;
2009                }
2010                rtnl_unlock();
2011
2012                return err;
2013        }
2014
2015        return -EINVAL;
2016}
2017
2018/*
2019 *      Drop the packet on the floor
2020 */
2021
2022static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2023{
2024        int type;
2025        struct dst_entry *dst = skb_dst(skb);
2026        switch (ipstats_mib_noroutes) {
2027        case IPSTATS_MIB_INNOROUTES:
2028                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2029                if (type == IPV6_ADDR_ANY) {
2030                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2031                                      IPSTATS_MIB_INADDRERRORS);
2032                        break;
2033                }
2034                /* FALLTHROUGH */
2035        case IPSTATS_MIB_OUTNOROUTES:
2036                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2037                              ipstats_mib_noroutes);
2038                break;
2039        }
2040        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2041        kfree_skb(skb);
2042        return 0;
2043}
2044
2045static int ip6_pkt_discard(struct sk_buff *skb)
2046{
2047        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2048}
2049
2050static int ip6_pkt_discard_out(struct sk_buff *skb)
2051{
2052        skb->dev = skb_dst(skb)->dev;
2053        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2054}
2055
2056#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2057
2058static int ip6_pkt_prohibit(struct sk_buff *skb)
2059{
2060        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2061}
2062
2063static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2064{
2065        skb->dev = skb_dst(skb)->dev;
2066        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2067}
2068
2069#endif
2070
2071/*
2072 *      Allocate a dst for local (unicast / anycast) address.
2073 */
2074
2075struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2076                                    const struct in6_addr *addr,
2077                                    bool anycast)
2078{
2079        struct net *net = dev_net(idev->dev);
2080        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2081        int err;
2082
2083        if (!rt) {
2084                net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2085                return ERR_PTR(-ENOMEM);
2086        }
2087
2088        in6_dev_hold(idev);
2089
2090        rt->dst.flags |= DST_HOST;
2091        rt->dst.input = ip6_input;
2092        rt->dst.output = ip6_output;
2093        rt->rt6i_idev = idev;
2094
2095        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2096        if (anycast)
2097                rt->rt6i_flags |= RTF_ANYCAST;
2098        else
2099                rt->rt6i_flags |= RTF_LOCAL;
2100        err = rt6_bind_neighbour(rt, rt->dst.dev);
2101        if (err) {
2102                dst_free(&rt->dst);
2103                return ERR_PTR(err);
2104        }
2105
2106        rt->rt6i_dst.addr = *addr;
2107        rt->rt6i_dst.plen = 128;
2108        rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2109
2110        atomic_set(&rt->dst.__refcnt, 1);
2111
2112        return rt;
2113}
2114
2115int ip6_route_get_saddr(struct net *net,
2116                        struct rt6_info *rt,
2117                        const struct in6_addr *daddr,
2118                        unsigned int prefs,
2119                        struct in6_addr *saddr)
2120{
2121        struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2122        int err = 0;
2123        if (rt->rt6i_prefsrc.plen)
2124                *saddr = rt->rt6i_prefsrc.addr;
2125        else
2126                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2127                                         daddr, prefs, saddr);
2128        return err;
2129}
2130
2131/* remove deleted ip from prefsrc entries */
2132struct arg_dev_net_ip {
2133        struct net_device *dev;
2134        struct net *net;
2135        struct in6_addr *addr;
2136};
2137
2138static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2139{
2140        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2141        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2142        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2143
2144        if (((void *)rt->dst.dev == dev || !dev) &&
2145            rt != net->ipv6.ip6_null_entry &&
2146            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2147                /* remove prefsrc entry */
2148                rt->rt6i_prefsrc.plen = 0;
2149        }
2150        return 0;
2151}
2152
2153void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2154{
2155        struct net *net = dev_net(ifp->idev->dev);
2156        struct arg_dev_net_ip adni = {
2157                .dev = ifp->idev->dev,
2158                .net = net,
2159                .addr = &ifp->addr,
2160        };
2161        fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2162}
2163
2164struct arg_dev_net {
2165        struct net_device *dev;
2166        struct net *net;
2167};
2168
2169static int fib6_ifdown(struct rt6_info *rt, void *arg)
2170{
2171        const struct arg_dev_net *adn = arg;
2172        const struct net_device *dev = adn->dev;
2173
2174        if ((rt->dst.dev == dev || !dev) &&
2175            rt != adn->net->ipv6.ip6_null_entry)
2176                return -1;
2177
2178        return 0;
2179}
2180
2181void rt6_ifdown(struct net *net, struct net_device *dev)
2182{
2183        struct arg_dev_net adn = {
2184                .dev = dev,
2185                .net = net,
2186        };
2187
2188        fib6_clean_all(net, fib6_ifdown, 0, &adn);
2189        icmp6_clean_all(fib6_ifdown, &adn);
2190}
2191
2192struct rt6_mtu_change_arg {
2193        struct net_device *dev;
2194        unsigned int mtu;
2195};
2196
2197static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2198{
2199        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2200        struct inet6_dev *idev;
2201
2202        /* In IPv6 pmtu discovery is not optional,
2203           so that RTAX_MTU lock cannot disable it.
2204           We still use this lock to block changes
2205           caused by addrconf/ndisc.
2206        */
2207
2208        idev = __in6_dev_get(arg->dev);
2209        if (!idev)
2210                return 0;
2211
2212        /* For administrative MTU increase, there is no way to discover
2213           IPv6 PMTU increase, so PMTU increase should be updated here.
2214           Since RFC 1981 doesn't include administrative MTU increase
2215           update PMTU increase is a MUST. (i.e. jumbo frame)
2216         */
2217        /*
2218           If new MTU is less than route PMTU, this new MTU will be the
2219           lowest MTU in the path, update the route PMTU to reflect PMTU
2220           decreases; if new MTU is greater than route PMTU, and the
2221           old MTU is the lowest MTU in the path, update the route PMTU
2222           to reflect the increase. In this case if the other nodes' MTU
2223           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2224           PMTU discouvery.
2225         */
2226        if (rt->dst.dev == arg->dev &&
2227            !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2228            (dst_mtu(&rt->dst) >= arg->mtu ||
2229             (dst_mtu(&rt->dst) < arg->mtu &&
2230              dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2231                dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2232        }
2233        return 0;
2234}
2235
2236void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2237{
2238        struct rt6_mtu_change_arg arg = {
2239                .dev = dev,
2240                .mtu = mtu,
2241        };
2242
2243        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2244}
2245
2246static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2247        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2248        [RTA_OIF]               = { .type = NLA_U32 },
2249        [RTA_IIF]               = { .type = NLA_U32 },
2250        [RTA_PRIORITY]          = { .type = NLA_U32 },
2251        [RTA_METRICS]           = { .type = NLA_NESTED },
2252};
2253
2254static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2255                              struct fib6_config *cfg)
2256{
2257        struct rtmsg *rtm;
2258        struct nlattr *tb[RTA_MAX+1];
2259        int err;
2260
2261        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2262        if (err < 0)
2263                goto errout;
2264
2265        err = -EINVAL;
2266        rtm = nlmsg_data(nlh);
2267        memset(cfg, 0, sizeof(*cfg));
2268
2269        cfg->fc_table = rtm->rtm_table;
2270        cfg->fc_dst_len = rtm->rtm_dst_len;
2271        cfg->fc_src_len = rtm->rtm_src_len;
2272        cfg->fc_flags = RTF_UP;
2273        cfg->fc_protocol = rtm->rtm_protocol;
2274        cfg->fc_type = rtm->rtm_type;
2275
2276        if (rtm->rtm_type == RTN_UNREACHABLE ||
2277            rtm->rtm_type == RTN_BLACKHOLE ||
2278            rtm->rtm_type == RTN_PROHIBIT ||
2279            rtm->rtm_type == RTN_THROW)
2280                cfg->fc_flags |= RTF_REJECT;
2281
2282        if (rtm->rtm_type == RTN_LOCAL)
2283                cfg->fc_flags |= RTF_LOCAL;
2284
2285        cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2286        cfg->fc_nlinfo.nlh = nlh;
2287        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2288
2289        if (tb[RTA_GATEWAY]) {
2290                nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2291                cfg->fc_flags |= RTF_GATEWAY;
2292        }
2293
2294        if (tb[RTA_DST]) {
2295                int plen = (rtm->rtm_dst_len + 7) >> 3;
2296
2297                if (nla_len(tb[RTA_DST]) < plen)
2298                        goto errout;
2299
2300                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2301        }
2302
2303        if (tb[RTA_SRC]) {
2304                int plen = (rtm->rtm_src_len + 7) >> 3;
2305
2306                if (nla_len(tb[RTA_SRC]) < plen)
2307                        goto errout;
2308
2309                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2310        }
2311
2312        if (tb[RTA_PREFSRC])
2313                nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2314
2315        if (tb[RTA_OIF])
2316                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2317
2318        if (tb[RTA_PRIORITY])
2319                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2320
2321        if (tb[RTA_METRICS]) {
2322                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2323                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2324        }
2325
2326        if (tb[RTA_TABLE])
2327                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2328
2329        err = 0;
2330errout:
2331        return err;
2332}
2333
2334static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2335{
2336        struct fib6_config cfg;
2337        int err;
2338
2339        err = rtm_to_fib6_config(skb, nlh, &cfg);
2340        if (err < 0)
2341                return err;
2342
2343        return ip6_route_del(&cfg);
2344}
2345
2346static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2347{
2348        struct fib6_config cfg;
2349        int err;
2350
2351        err = rtm_to_fib6_config(skb, nlh, &cfg);
2352        if (err < 0)
2353                return err;
2354
2355        return ip6_route_add(&cfg);
2356}
2357
2358static inline size_t rt6_nlmsg_size(void)
2359{
2360        return NLMSG_ALIGN(sizeof(struct rtmsg))
2361               + nla_total_size(16) /* RTA_SRC */
2362               + nla_total_size(16) /* RTA_DST */
2363               + nla_total_size(16) /* RTA_GATEWAY */
2364               + nla_total_size(16) /* RTA_PREFSRC */
2365               + nla_total_size(4) /* RTA_TABLE */
2366               + nla_total_size(4) /* RTA_IIF */
2367               + nla_total_size(4) /* RTA_OIF */
2368               + nla_total_size(4) /* RTA_PRIORITY */
2369               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2370               + nla_total_size(sizeof(struct rta_cacheinfo));
2371}
2372
2373static int rt6_fill_node(struct net *net,
2374                         struct sk_buff *skb, struct rt6_info *rt,
2375                         struct in6_addr *dst, struct in6_addr *src,
2376                         int iif, int type, u32 portid, u32 seq,
2377                         int prefix, int nowait, unsigned int flags)
2378{
2379        struct rtmsg *rtm;
2380        struct nlmsghdr *nlh;
2381        long expires;
2382        u32 table;
2383        struct neighbour *n;
2384
2385        if (prefix) {   /* user wants prefix routes only */
2386                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2387                        /* success since this is not a prefix route */
2388                        return 1;
2389                }
2390        }
2391
2392        nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2393        if (!nlh)
2394                return -EMSGSIZE;
2395
2396        rtm = nlmsg_data(nlh);
2397        rtm->rtm_family = AF_INET6;
2398        rtm->rtm_dst_len = rt->rt6i_dst.plen;
2399        rtm->rtm_src_len = rt->rt6i_src.plen;
2400        rtm->rtm_tos = 0;
2401        if (rt->rt6i_table)
2402                table = rt->rt6i_table->tb6_id;
2403        else
2404                table = RT6_TABLE_UNSPEC;
2405        rtm->rtm_table = table;
2406        if (nla_put_u32(skb, RTA_TABLE, table))
2407                goto nla_put_failure;
2408        if (rt->rt6i_flags & RTF_REJECT) {
2409                switch (rt->dst.error) {
2410                case -EINVAL:
2411                        rtm->rtm_type = RTN_BLACKHOLE;
2412                        break;
2413                case -EACCES:
2414                        rtm->rtm_type = RTN_PROHIBIT;
2415                        break;
2416                case -EAGAIN:
2417                        rtm->rtm_type = RTN_THROW;
2418                        break;
2419                default:
2420                        rtm->rtm_type = RTN_UNREACHABLE;
2421                        break;
2422                }
2423        }
2424        else if (rt->rt6i_flags & RTF_LOCAL)
2425                rtm->rtm_type = RTN_LOCAL;
2426        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2427                rtm->rtm_type = RTN_LOCAL;
2428        else
2429                rtm->rtm_type = RTN_UNICAST;
2430        rtm->rtm_flags = 0;
2431        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2432        rtm->rtm_protocol = rt->rt6i_protocol;
2433        if (rt->rt6i_flags & RTF_DYNAMIC)
2434                rtm->rtm_protocol = RTPROT_REDIRECT;
2435        else if (rt->rt6i_flags & RTF_ADDRCONF) {
2436                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2437                        rtm->rtm_protocol = RTPROT_RA;
2438                else
2439                        rtm->rtm_protocol = RTPROT_KERNEL;
2440        }
2441
2442        if (rt->rt6i_flags & RTF_CACHE)
2443                rtm->rtm_flags |= RTM_F_CLONED;
2444
2445        if (dst) {
2446                if (nla_put(skb, RTA_DST, 16, dst))
2447                        goto nla_put_failure;
2448                rtm->rtm_dst_len = 128;
2449        } else if (rtm->rtm_dst_len)
2450                if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2451                        goto nla_put_failure;
2452#ifdef CONFIG_IPV6_SUBTREES
2453        if (src) {
2454                if (nla_put(skb, RTA_SRC, 16, src))
2455                        goto nla_put_failure;
2456                rtm->rtm_src_len = 128;
2457        } else if (rtm->rtm_src_len &&
2458                   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2459                goto nla_put_failure;
2460#endif
2461        if (iif) {
2462#ifdef CONFIG_IPV6_MROUTE
2463                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2464                        int err = ip6mr_get_route(net, skb, rtm, nowait);
2465                        if (err <= 0) {
2466                                if (!nowait) {
2467                                        if (err == 0)
2468                                                return 0;
2469                                        goto nla_put_failure;
2470                                } else {
2471                                        if (err == -EMSGSIZE)
2472                                                goto nla_put_failure;
2473                                }
2474                        }
2475                } else
2476#endif
2477                        if (nla_put_u32(skb, RTA_IIF, iif))
2478                                goto nla_put_failure;
2479        } else if (dst) {
2480                struct in6_addr saddr_buf;
2481                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2482                    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2483                        goto nla_put_failure;
2484        }
2485
2486        if (rt->rt6i_prefsrc.plen) {
2487                struct in6_addr saddr_buf;
2488                saddr_buf = rt->rt6i_prefsrc.addr;
2489                if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2490                        goto nla_put_failure;
2491        }
2492
2493        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2494                goto nla_put_failure;
2495
2496        n = rt->n;
2497        if (n) {
2498                if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2499                        goto nla_put_failure;
2500        }
2501
2502        if (rt->dst.dev &&
2503            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2504                goto nla_put_failure;
2505        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2506                goto nla_put_failure;
2507
2508        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2509
2510        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2511                goto nla_put_failure;
2512
2513        return nlmsg_end(skb, nlh);
2514
2515nla_put_failure:
2516        nlmsg_cancel(skb, nlh);
2517        return -EMSGSIZE;
2518}
2519
2520int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2521{
2522        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2523        int prefix;
2524
2525        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2526                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2527                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2528        } else
2529                prefix = 0;
2530
2531        return rt6_fill_node(arg->net,
2532                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2533                     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2534                     prefix, 0, NLM_F_MULTI);
2535}
2536
2537static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2538{
2539        struct net *net = sock_net(in_skb->sk);
2540        struct nlattr *tb[RTA_MAX+1];
2541        struct rt6_info *rt;
2542        struct sk_buff *skb;
2543        struct rtmsg *rtm;
2544        struct flowi6 fl6;
2545        int err, iif = 0, oif = 0;
2546
2547        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2548        if (err < 0)
2549                goto errout;
2550
2551        err = -EINVAL;
2552        memset(&fl6, 0, sizeof(fl6));
2553
2554        if (tb[RTA_SRC]) {
2555                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2556                        goto errout;
2557
2558                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2559        }
2560
2561        if (tb[RTA_DST]) {
2562                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2563                        goto errout;
2564
2565                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2566        }
2567
2568        if (tb[RTA_IIF])
2569                iif = nla_get_u32(tb[RTA_IIF]);
2570
2571        if (tb[RTA_OIF])
2572                oif = nla_get_u32(tb[RTA_OIF]);
2573
2574        if (iif) {
2575                struct net_device *dev;
2576                int flags = 0;
2577
2578                dev = __dev_get_by_index(net, iif);
2579                if (!dev) {
2580                        err = -ENODEV;
2581                        goto errout;
2582                }
2583
2584                fl6.flowi6_iif = iif;
2585
2586                if (!ipv6_addr_any(&fl6.saddr))
2587                        flags |= RT6_LOOKUP_F_HAS_SADDR;
2588
2589                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2590                                                               flags);
2591        } else {
2592                fl6.flowi6_oif = oif;
2593
2594                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2595        }
2596
2597        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2598        if (!skb) {
2599                dst_release(&rt->dst);
2600                err = -ENOBUFS;
2601                goto errout;
2602        }
2603
2604        /* Reserve room for dummy headers, this skb can pass
2605           through good chunk of routing engine.
2606         */
2607        skb_reset_mac_header(skb);
2608        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2609
2610        skb_dst_set(skb, &rt->dst);
2611
2612        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2613                            RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2614                            nlh->nlmsg_seq, 0, 0, 0);
2615        if (err < 0) {
2616                kfree_skb(skb);
2617                goto errout;
2618        }
2619
2620        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2621errout:
2622        return err;
2623}
2624
2625void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2626{
2627        struct sk_buff *skb;
2628        struct net *net = info->nl_net;
2629        u32 seq;
2630        int err;
2631
2632        err = -ENOBUFS;
2633        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2634
2635        skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2636        if (!skb)
2637                goto errout;
2638
2639        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2640                                event, info->portid, seq, 0, 0, 0);
2641        if (err < 0) {
2642                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2643                WARN_ON(err == -EMSGSIZE);
2644                kfree_skb(skb);
2645                goto errout;
2646        }
2647        rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2648                    info->nlh, gfp_any());
2649        return;
2650errout:
2651        if (err < 0)
2652                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2653}
2654
2655static int ip6_route_dev_notify(struct notifier_block *this,
2656                                unsigned long event, void *data)
2657{
2658        struct net_device *dev = (struct net_device *)data;
2659        struct net *net = dev_net(dev);
2660
2661        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2662                net->ipv6.ip6_null_entry->dst.dev = dev;
2663                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2664#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2665                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2666                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2667                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2668                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2669#endif
2670        }
2671
2672        return NOTIFY_OK;
2673}
2674
2675/*
2676 *      /proc
2677 */
2678
2679#ifdef CONFIG_PROC_FS
2680
2681struct rt6_proc_arg
2682{
2683        char *buffer;
2684        int offset;
2685        int length;
2686        int skip;
2687        int len;
2688};
2689
2690static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2691{
2692        struct seq_file *m = p_arg;
2693        struct neighbour *n;
2694
2695        seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2696
2697#ifdef CONFIG_IPV6_SUBTREES
2698        seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2699#else
2700        seq_puts(m, "00000000000000000000000000000000 00 ");
2701#endif
2702        n = rt->n;
2703        if (n) {
2704                seq_printf(m, "%pi6", n->primary_key);
2705        } else {
2706                seq_puts(m, "00000000000000000000000000000000");
2707        }
2708        seq_printf(m, " %08x %08x %08x %08x %8s\n",
2709                   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2710                   rt->dst.__use, rt->rt6i_flags,
2711                   rt->dst.dev ? rt->dst.dev->name : "");
2712        return 0;
2713}
2714
2715static int ipv6_route_show(struct seq_file *m, void *v)
2716{
2717        struct net *net = (struct net *)m->private;
2718        fib6_clean_all_ro(net, rt6_info_route, 0, m);
2719        return 0;
2720}
2721
2722static int ipv6_route_open(struct inode *inode, struct file *file)
2723{
2724        return single_open_net(inode, file, ipv6_route_show);
2725}
2726
2727static const struct file_operations ipv6_route_proc_fops = {
2728        .owner          = THIS_MODULE,
2729        .open           = ipv6_route_open,
2730        .read           = seq_read,
2731        .llseek         = seq_lseek,
2732        .release        = single_release_net,
2733};
2734
2735static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2736{
2737        struct net *net = (struct net *)seq->private;
2738        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2739                   net->ipv6.rt6_stats->fib_nodes,
2740                   net->ipv6.rt6_stats->fib_route_nodes,
2741                   net->ipv6.rt6_stats->fib_rt_alloc,
2742                   net->ipv6.rt6_stats->fib_rt_entries,
2743                   net->ipv6.rt6_stats->fib_rt_cache,
2744                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2745                   net->ipv6.rt6_stats->fib_discarded_routes);
2746
2747        return 0;
2748}
2749
2750static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2751{
2752        return single_open_net(inode, file, rt6_stats_seq_show);
2753}
2754
2755static const struct file_operations rt6_stats_seq_fops = {
2756        .owner   = THIS_MODULE,
2757        .open    = rt6_stats_seq_open,
2758        .read    = seq_read,
2759        .llseek  = seq_lseek,
2760        .release = single_release_net,
2761};
2762#endif  /* CONFIG_PROC_FS */
2763
2764#ifdef CONFIG_SYSCTL
2765
2766static
2767int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2768                              void __user *buffer, size_t *lenp, loff_t *ppos)
2769{
2770        struct net *net;
2771        int delay;
2772        if (!write)
2773                return -EINVAL;
2774
2775        net = (struct net *)ctl->extra1;
2776        delay = net->ipv6.sysctl.flush_delay;
2777        proc_dointvec(ctl, write, buffer, lenp, ppos);
2778        fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2779        return 0;
2780}
2781
2782ctl_table ipv6_route_table_template[] = {
2783        {
2784                .procname       =       "flush",
2785                .data           =       &init_net.ipv6.sysctl.flush_delay,
2786                .maxlen         =       sizeof(int),
2787                .mode           =       0200,
2788                .proc_handler   =       ipv6_sysctl_rtcache_flush
2789        },
2790        {
2791                .procname       =       "gc_thresh",
2792                .data           =       &ip6_dst_ops_template.gc_thresh,
2793                .maxlen         =       sizeof(int),
2794                .mode           =       0644,
2795                .proc_handler   =       proc_dointvec,
2796        },
2797        {
2798                .procname       =       "max_size",
2799                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2800                .maxlen         =       sizeof(int),
2801                .mode           =       0644,
2802                .proc_handler   =       proc_dointvec,
2803        },
2804        {
2805                .procname       =       "gc_min_interval",
2806                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2807                .maxlen         =       sizeof(int),
2808                .mode           =       0644,
2809                .proc_handler   =       proc_dointvec_jiffies,
2810        },
2811        {
2812                .procname       =       "gc_timeout",
2813                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2814                .maxlen         =       sizeof(int),
2815                .mode           =       0644,
2816                .proc_handler   =       proc_dointvec_jiffies,
2817        },
2818        {
2819                .procname       =       "gc_interval",
2820                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2821                .maxlen         =       sizeof(int),
2822                .mode           =       0644,
2823                .proc_handler   =       proc_dointvec_jiffies,
2824        },
2825        {
2826                .procname       =       "gc_elasticity",
2827                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2828                .maxlen         =       sizeof(int),
2829                .mode           =       0644,
2830                .proc_handler   =       proc_dointvec,
2831        },
2832        {
2833                .procname       =       "mtu_expires",
2834                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2835                .maxlen         =       sizeof(int),
2836                .mode           =       0644,
2837                .proc_handler   =       proc_dointvec_jiffies,
2838        },
2839        {
2840                .procname       =       "min_adv_mss",
2841                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2842                .maxlen         =       sizeof(int),
2843                .mode           =       0644,
2844                .proc_handler   =       proc_dointvec,
2845        },
2846        {
2847                .procname       =       "gc_min_interval_ms",
2848                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2849                .maxlen         =       sizeof(int),
2850                .mode           =       0644,
2851                .proc_handler   =       proc_dointvec_ms_jiffies,
2852        },
2853        { }
2854};
2855
2856struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2857{
2858        struct ctl_table *table;
2859
2860        table = kmemdup(ipv6_route_table_template,
2861                        sizeof(ipv6_route_table_template),
2862                        GFP_KERNEL);
2863
2864        if (table) {
2865                table[0].data = &net->ipv6.sysctl.flush_delay;
2866                table[0].extra1 = net;
2867                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2868                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2869                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2870                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2871                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2872                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2873                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2874                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2875                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2876        }
2877
2878        return table;
2879}
2880#endif
2881
2882static int __net_init ip6_route_net_init(struct net *net)
2883{
2884        int ret = -ENOMEM;
2885
2886        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2887               sizeof(net->ipv6.ip6_dst_ops));
2888
2889        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2890                goto out_ip6_dst_ops;
2891
2892        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2893                                           sizeof(*net->ipv6.ip6_null_entry),
2894                                           GFP_KERNEL);
2895        if (!net->ipv6.ip6_null_entry)
2896                goto out_ip6_dst_entries;
2897        net->ipv6.ip6_null_entry->dst.path =
2898                (struct dst_entry *)net->ipv6.ip6_null_entry;
2899        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2900        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2901                         ip6_template_metrics, true);
2902
2903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2905                                               sizeof(*net->ipv6.ip6_prohibit_entry),
2906                                               GFP_KERNEL);
2907        if (!net->ipv6.ip6_prohibit_entry)
2908                goto out_ip6_null_entry;
2909        net->ipv6.ip6_prohibit_entry->dst.path =
2910                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2911        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2912        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2913                         ip6_template_metrics, true);
2914
2915        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2916                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
2917                                               GFP_KERNEL);
2918        if (!net->ipv6.ip6_blk_hole_entry)
2919                goto out_ip6_prohibit_entry;
2920        net->ipv6.ip6_blk_hole_entry->dst.path =
2921                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2922        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2923        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2924                         ip6_template_metrics, true);
2925#endif
2926
2927        net->ipv6.sysctl.flush_delay = 0;
2928        net->ipv6.sysctl.ip6_rt_max_size = 4096;
2929        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2930        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2931        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2932        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2933        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2934        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2935
2936        net->ipv6.ip6_rt_gc_expire = 30*HZ;
2937
2938        ret = 0;
2939out:
2940        return ret;
2941
2942#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2943out_ip6_prohibit_entry:
2944        kfree(net->ipv6.ip6_prohibit_entry);
2945out_ip6_null_entry:
2946        kfree(net->ipv6.ip6_null_entry);
2947#endif
2948out_ip6_dst_entries:
2949        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2950out_ip6_dst_ops:
2951        goto out;
2952}
2953
2954static void __net_exit ip6_route_net_exit(struct net *net)
2955{
2956        kfree(net->ipv6.ip6_null_entry);
2957#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2958        kfree(net->ipv6.ip6_prohibit_entry);
2959        kfree(net->ipv6.ip6_blk_hole_entry);
2960#endif
2961        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2962}
2963
2964static int __net_init ip6_route_net_init_late(struct net *net)
2965{
2966#ifdef CONFIG_PROC_FS
2967        proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2968        proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2969#endif
2970        return 0;
2971}
2972
2973static void __net_exit ip6_route_net_exit_late(struct net *net)
2974{
2975#ifdef CONFIG_PROC_FS
2976        proc_net_remove(net, "ipv6_route");
2977        proc_net_remove(net, "rt6_stats");
2978#endif
2979}
2980
2981static struct pernet_operations ip6_route_net_ops = {
2982        .init = ip6_route_net_init,
2983        .exit = ip6_route_net_exit,
2984};
2985
2986static int __net_init ipv6_inetpeer_init(struct net *net)
2987{
2988        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2989
2990        if (!bp)
2991                return -ENOMEM;
2992        inet_peer_base_init(bp);
2993        net->ipv6.peers = bp;
2994        return 0;
2995}
2996
2997static void __net_exit ipv6_inetpeer_exit(struct net *net)
2998{
2999        struct inet_peer_base *bp = net->ipv6.peers;
3000
3001        net->ipv6.peers = NULL;
3002        inetpeer_invalidate_tree(bp);
3003        kfree(bp);
3004}
3005
3006static struct pernet_operations ipv6_inetpeer_ops = {
3007        .init   =       ipv6_inetpeer_init,
3008        .exit   =       ipv6_inetpeer_exit,
3009};
3010
3011static struct pernet_operations ip6_route_net_late_ops = {
3012        .init = ip6_route_net_init_late,
3013        .exit = ip6_route_net_exit_late,
3014};
3015
3016static struct notifier_block ip6_route_dev_notifier = {
3017        .notifier_call = ip6_route_dev_notify,
3018        .priority = 0,
3019};
3020
3021int __init ip6_route_init(void)
3022{
3023        int ret;
3024
3025        ret = -ENOMEM;
3026        ip6_dst_ops_template.kmem_cachep =
3027                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3028                                  SLAB_HWCACHE_ALIGN, NULL);
3029        if (!ip6_dst_ops_template.kmem_cachep)
3030                goto out;
3031
3032        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3033        if (ret)
3034                goto out_kmem_cache;
3035
3036        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3037        if (ret)
3038                goto out_dst_entries;
3039
3040        ret = register_pernet_subsys(&ip6_route_net_ops);
3041        if (ret)
3042                goto out_register_inetpeer;
3043
3044        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3045
3046        /* Registering of the loopback is done before this portion of code,
3047         * the loopback reference in rt6_info will not be taken, do it
3048         * manually for init_net */
3049        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3050        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3053        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3055        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3056  #endif
3057        ret = fib6_init();
3058        if (ret)
3059                goto out_register_subsys;
3060
3061        ret = xfrm6_init();
3062        if (ret)
3063                goto out_fib6_init;
3064
3065        ret = fib6_rules_init();
3066        if (ret)
3067                goto xfrm6_init;
3068
3069        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3070        if (ret)
3071                goto fib6_rules_init;
3072
3073        ret = -ENOBUFS;
3074        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3075            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3076            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3077                goto out_register_late_subsys;
3078
3079        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3080        if (ret)
3081                goto out_register_late_subsys;
3082
3083out:
3084        return ret;
3085
3086out_register_late_subsys:
3087        unregister_pernet_subsys(&ip6_route_net_late_ops);
3088fib6_rules_init:
3089        fib6_rules_cleanup();
3090xfrm6_init:
3091        xfrm6_fini();
3092out_fib6_init:
3093        fib6_gc_cleanup();
3094out_register_subsys:
3095        unregister_pernet_subsys(&ip6_route_net_ops);
3096out_register_inetpeer:
3097        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3098out_dst_entries:
3099        dst_entries_destroy(&ip6_dst_blackhole_ops);
3100out_kmem_cache:
3101        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3102        goto out;
3103}
3104
3105void ip6_route_cleanup(void)
3106{
3107        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3108        unregister_pernet_subsys(&ip6_route_net_late_ops);
3109        fib6_rules_cleanup();
3110        xfrm6_fini();
3111        fib6_gc_cleanup();
3112        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3113        unregister_pernet_subsys(&ip6_route_net_ops);
3114        dst_entries_destroy(&ip6_dst_blackhole_ops);
3115        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3116}
3117
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.