linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
  60
  61#include <asm/uaccess.h>
  62
  63#ifdef CONFIG_SYSCTL
  64#include <linux/sysctl.h>
  65#endif
  66
  67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
  68                                    const struct in6_addr *dest);
  69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  70static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  71static unsigned int      ip6_mtu(const struct dst_entry *dst);
  72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  73static void             ip6_dst_destroy(struct dst_entry *);
  74static void             ip6_dst_ifdown(struct dst_entry *,
  75                                       struct net_device *dev, int how);
  76static int               ip6_dst_gc(struct dst_ops *ops);
  77
  78static int              ip6_pkt_discard(struct sk_buff *skb);
  79static int              ip6_pkt_discard_out(struct sk_buff *skb);
  80static void             ip6_link_failure(struct sk_buff *skb);
  81static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  82                                           struct sk_buff *skb, u32 mtu);
  83static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  84                                        struct sk_buff *skb);
  85
  86#ifdef CONFIG_IPV6_ROUTE_INFO
  87static struct rt6_info *rt6_add_route_info(struct net *net,
  88                                           const struct in6_addr *prefix, int prefixlen,
  89                                           const struct in6_addr *gwaddr, int ifindex,
  90                                           unsigned int pref);
  91static struct rt6_info *rt6_get_route_info(struct net *net,
  92                                           const struct in6_addr *prefix, int prefixlen,
  93                                           const struct in6_addr *gwaddr, int ifindex);
  94#endif
  95
  96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
  97{
  98        struct rt6_info *rt = (struct rt6_info *) dst;
  99        struct inet_peer *peer;
 100        u32 *p = NULL;
 101
 102        if (!(rt->dst.flags & DST_HOST))
 103                return NULL;
 104
 105        peer = rt6_get_peer_create(rt);
 106        if (peer) {
 107                u32 *old_p = __DST_METRICS_PTR(old);
 108                unsigned long prev, new;
 109
 110                p = peer->metrics;
 111                if (inet_metrics_new(peer))
 112                        memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 113
 114                new = (unsigned long) p;
 115                prev = cmpxchg(&dst->_metrics, old, new);
 116
 117                if (prev != old) {
 118                        p = __DST_METRICS_PTR(prev);
 119                        if (prev & DST_METRICS_READ_ONLY)
 120                                p = NULL;
 121                }
 122        }
 123        return p;
 124}
 125
 126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 127                                             struct sk_buff *skb,
 128                                             const void *daddr)
 129{
 130        struct in6_addr *p = &rt->rt6i_gateway;
 131
 132        if (!ipv6_addr_any(p))
 133                return (const void *) p;
 134        else if (skb)
 135                return &ipv6_hdr(skb)->daddr;
 136        return daddr;
 137}
 138
 139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 140                                          struct sk_buff *skb,
 141                                          const void *daddr)
 142{
 143        struct rt6_info *rt = (struct rt6_info *) dst;
 144        struct neighbour *n;
 145
 146        daddr = choose_neigh_daddr(rt, skb, daddr);
 147        n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
 148        if (n)
 149                return n;
 150        return neigh_create(&nd_tbl, daddr, dst->dev);
 151}
 152
 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
 154{
 155        struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
 156        if (!n) {
 157                n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
 158                if (IS_ERR(n))
 159                        return PTR_ERR(n);
 160        }
 161        rt->n = n;
 162
 163        return 0;
 164}
 165
 166static struct dst_ops ip6_dst_ops_template = {
 167        .family                 =       AF_INET6,
 168        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 169        .gc                     =       ip6_dst_gc,
 170        .gc_thresh              =       1024,
 171        .check                  =       ip6_dst_check,
 172        .default_advmss         =       ip6_default_advmss,
 173        .mtu                    =       ip6_mtu,
 174        .cow_metrics            =       ipv6_cow_metrics,
 175        .destroy                =       ip6_dst_destroy,
 176        .ifdown                 =       ip6_dst_ifdown,
 177        .negative_advice        =       ip6_negative_advice,
 178        .link_failure           =       ip6_link_failure,
 179        .update_pmtu            =       ip6_rt_update_pmtu,
 180        .redirect               =       rt6_do_redirect,
 181        .local_out              =       __ip6_local_out,
 182        .neigh_lookup           =       ip6_neigh_lookup,
 183};
 184
 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 186{
 187        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 188
 189        return mtu ? : dst->dev->mtu;
 190}
 191
 192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 193                                         struct sk_buff *skb, u32 mtu)
 194{
 195}
 196
 197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 198                                      struct sk_buff *skb)
 199{
 200}
 201
 202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 203                                         unsigned long old)
 204{
 205        return NULL;
 206}
 207
 208static struct dst_ops ip6_dst_blackhole_ops = {
 209        .family                 =       AF_INET6,
 210        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 211        .destroy                =       ip6_dst_destroy,
 212        .check                  =       ip6_dst_check,
 213        .mtu                    =       ip6_blackhole_mtu,
 214        .default_advmss         =       ip6_default_advmss,
 215        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 216        .redirect               =       ip6_rt_blackhole_redirect,
 217        .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
 218        .neigh_lookup           =       ip6_neigh_lookup,
 219};
 220
 221static const u32 ip6_template_metrics[RTAX_MAX] = {
 222        [RTAX_HOPLIMIT - 1] = 255,
 223};
 224
 225static struct rt6_info ip6_null_entry_template = {
 226        .dst = {
 227                .__refcnt       = ATOMIC_INIT(1),
 228                .__use          = 1,
 229                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 230                .error          = -ENETUNREACH,
 231                .input          = ip6_pkt_discard,
 232                .output         = ip6_pkt_discard_out,
 233        },
 234        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 235        .rt6i_protocol  = RTPROT_KERNEL,
 236        .rt6i_metric    = ~(u32) 0,
 237        .rt6i_ref       = ATOMIC_INIT(1),
 238};
 239
 240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 241
 242static int ip6_pkt_prohibit(struct sk_buff *skb);
 243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 244
 245static struct rt6_info ip6_prohibit_entry_template = {
 246        .dst = {
 247                .__refcnt       = ATOMIC_INIT(1),
 248                .__use          = 1,
 249                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 250                .error          = -EACCES,
 251                .input          = ip6_pkt_prohibit,
 252                .output         = ip6_pkt_prohibit_out,
 253        },
 254        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 255        .rt6i_protocol  = RTPROT_KERNEL,
 256        .rt6i_metric    = ~(u32) 0,
 257        .rt6i_ref       = ATOMIC_INIT(1),
 258};
 259
 260static struct rt6_info ip6_blk_hole_entry_template = {
 261        .dst = {
 262                .__refcnt       = ATOMIC_INIT(1),
 263                .__use          = 1,
 264                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 265                .error          = -EINVAL,
 266                .input          = dst_discard,
 267                .output         = dst_discard,
 268        },
 269        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 270        .rt6i_protocol  = RTPROT_KERNEL,
 271        .rt6i_metric    = ~(u32) 0,
 272        .rt6i_ref       = ATOMIC_INIT(1),
 273};
 274
 275#endif
 276
 277/* allocate dst with ip6_dst_ops */
 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 279                                             struct net_device *dev,
 280                                             int flags,
 281                                             struct fib6_table *table)
 282{
 283        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 284                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 285
 286        if (rt) {
 287                struct dst_entry *dst = &rt->dst;
 288
 289                memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 290                rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 291                rt->rt6i_genid = rt_genid(net);
 292        }
 293        return rt;
 294}
 295
 296static void ip6_dst_destroy(struct dst_entry *dst)
 297{
 298        struct rt6_info *rt = (struct rt6_info *)dst;
 299        struct inet6_dev *idev = rt->rt6i_idev;
 300
 301        if (rt->n)
 302                neigh_release(rt->n);
 303
 304        if (!(rt->dst.flags & DST_HOST))
 305                dst_destroy_metrics_generic(dst);
 306
 307        if (idev) {
 308                rt->rt6i_idev = NULL;
 309                in6_dev_put(idev);
 310        }
 311
 312        if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
 313                dst_release(dst->from);
 314
 315        if (rt6_has_peer(rt)) {
 316                struct inet_peer *peer = rt6_peer_ptr(rt);
 317                inet_putpeer(peer);
 318        }
 319}
 320
 321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 322
 323static u32 rt6_peer_genid(void)
 324{
 325        return atomic_read(&__rt6_peer_genid);
 326}
 327
 328void rt6_bind_peer(struct rt6_info *rt, int create)
 329{
 330        struct inet_peer_base *base;
 331        struct inet_peer *peer;
 332
 333        base = inetpeer_base_ptr(rt->_rt6i_peer);
 334        if (!base)
 335                return;
 336
 337        peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
 338        if (peer) {
 339                if (!rt6_set_peer(rt, peer))
 340                        inet_putpeer(peer);
 341                else
 342                        rt->rt6i_peer_genid = rt6_peer_genid();
 343        }
 344}
 345
 346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 347                           int how)
 348{
 349        struct rt6_info *rt = (struct rt6_info *)dst;
 350        struct inet6_dev *idev = rt->rt6i_idev;
 351        struct net_device *loopback_dev =
 352                dev_net(dev)->loopback_dev;
 353
 354        if (dev != loopback_dev) {
 355                if (idev && idev->dev == dev) {
 356                        struct inet6_dev *loopback_idev =
 357                                in6_dev_get(loopback_dev);
 358                        if (loopback_idev) {
 359                                rt->rt6i_idev = loopback_idev;
 360                                in6_dev_put(idev);
 361                        }
 362                }
 363                if (rt->n && rt->n->dev == dev) {
 364                        rt->n->dev = loopback_dev;
 365                        dev_hold(loopback_dev);
 366                        dev_put(dev);
 367                }
 368        }
 369}
 370
 371static bool rt6_check_expired(const struct rt6_info *rt)
 372{
 373        struct rt6_info *ort = NULL;
 374
 375        if (rt->rt6i_flags & RTF_EXPIRES) {
 376                if (time_after(jiffies, rt->dst.expires))
 377                        return true;
 378        } else if (rt->dst.from) {
 379                ort = (struct rt6_info *) rt->dst.from;
 380                return (ort->rt6i_flags & RTF_EXPIRES) &&
 381                        time_after(jiffies, ort->dst.expires);
 382        }
 383        return false;
 384}
 385
 386static bool rt6_need_strict(const struct in6_addr *daddr)
 387{
 388        return ipv6_addr_type(daddr) &
 389                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 390}
 391
 392/*
 393 *      Route lookup. Any table->tb6_lock is implied.
 394 */
 395
 396static inline struct rt6_info *rt6_device_match(struct net *net,
 397                                                    struct rt6_info *rt,
 398                                                    const struct in6_addr *saddr,
 399                                                    int oif,
 400                                                    int flags)
 401{
 402        struct rt6_info *local = NULL;
 403        struct rt6_info *sprt;
 404
 405        if (!oif && ipv6_addr_any(saddr))
 406                goto out;
 407
 408        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 409                struct net_device *dev = sprt->dst.dev;
 410
 411                if (oif) {
 412                        if (dev->ifindex == oif)
 413                                return sprt;
 414                        if (dev->flags & IFF_LOOPBACK) {
 415                                if (!sprt->rt6i_idev ||
 416                                    sprt->rt6i_idev->dev->ifindex != oif) {
 417                                        if (flags & RT6_LOOKUP_F_IFACE && oif)
 418                                                continue;
 419                                        if (local && (!oif ||
 420                                                      local->rt6i_idev->dev->ifindex == oif))
 421                                                continue;
 422                                }
 423                                local = sprt;
 424                        }
 425                } else {
 426                        if (ipv6_chk_addr(net, saddr, dev,
 427                                          flags & RT6_LOOKUP_F_IFACE))
 428                                return sprt;
 429                }
 430        }
 431
 432        if (oif) {
 433                if (local)
 434                        return local;
 435
 436                if (flags & RT6_LOOKUP_F_IFACE)
 437                        return net->ipv6.ip6_null_entry;
 438        }
 439out:
 440        return rt;
 441}
 442
 443#ifdef CONFIG_IPV6_ROUTER_PREF
 444static void rt6_probe(struct rt6_info *rt)
 445{
 446        struct neighbour *neigh;
 447        /*
 448         * Okay, this does not seem to be appropriate
 449         * for now, however, we need to check if it
 450         * is really so; aka Router Reachability Probing.
 451         *
 452         * Router Reachability Probe MUST be rate-limited
 453         * to no more than one per minute.
 454         */
 455        rcu_read_lock();
 456        neigh = rt ? rt->n : NULL;
 457        if (!neigh || (neigh->nud_state & NUD_VALID))
 458                goto out;
 459        read_lock_bh(&neigh->lock);
 460        if (!(neigh->nud_state & NUD_VALID) &&
 461            time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 462                struct in6_addr mcaddr;
 463                struct in6_addr *target;
 464
 465                neigh->updated = jiffies;
 466                read_unlock_bh(&neigh->lock);
 467
 468                target = (struct in6_addr *)&neigh->primary_key;
 469                addrconf_addr_solict_mult(target, &mcaddr);
 470                ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 471        } else {
 472                read_unlock_bh(&neigh->lock);
 473        }
 474out:
 475        rcu_read_unlock();
 476}
 477#else
 478static inline void rt6_probe(struct rt6_info *rt)
 479{
 480}
 481#endif
 482
 483/*
 484 * Default Router Selection (RFC 2461 6.3.6)
 485 */
 486static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 487{
 488        struct net_device *dev = rt->dst.dev;
 489        if (!oif || dev->ifindex == oif)
 490                return 2;
 491        if ((dev->flags & IFF_LOOPBACK) &&
 492            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 493                return 1;
 494        return 0;
 495}
 496
 497static inline int rt6_check_neigh(struct rt6_info *rt)
 498{
 499        struct neighbour *neigh;
 500        int m;
 501
 502        rcu_read_lock();
 503        neigh = rt->n;
 504        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 505            !(rt->rt6i_flags & RTF_GATEWAY))
 506                m = 1;
 507        else if (neigh) {
 508                read_lock_bh(&neigh->lock);
 509                if (neigh->nud_state & NUD_VALID)
 510                        m = 2;
 511#ifdef CONFIG_IPV6_ROUTER_PREF
 512                else if (neigh->nud_state & NUD_FAILED)
 513                        m = 0;
 514#endif
 515                else
 516                        m = 1;
 517                read_unlock_bh(&neigh->lock);
 518        } else
 519                m = 0;
 520        rcu_read_unlock();
 521        return m;
 522}
 523
 524static int rt6_score_route(struct rt6_info *rt, int oif,
 525                           int strict)
 526{
 527        int m, n;
 528
 529        m = rt6_check_dev(rt, oif);
 530        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 531                return -1;
 532#ifdef CONFIG_IPV6_ROUTER_PREF
 533        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 534#endif
 535        n = rt6_check_neigh(rt);
 536        if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 537                return -1;
 538        return m;
 539}
 540
 541static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 542                                   int *mpri, struct rt6_info *match)
 543{
 544        int m;
 545
 546        if (rt6_check_expired(rt))
 547                goto out;
 548
 549        m = rt6_score_route(rt, oif, strict);
 550        if (m < 0)
 551                goto out;
 552
 553        if (m > *mpri) {
 554                if (strict & RT6_LOOKUP_F_REACHABLE)
 555                        rt6_probe(match);
 556                *mpri = m;
 557                match = rt;
 558        } else if (strict & RT6_LOOKUP_F_REACHABLE) {
 559                rt6_probe(rt);
 560        }
 561
 562out:
 563        return match;
 564}
 565
 566static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 567                                     struct rt6_info *rr_head,
 568                                     u32 metric, int oif, int strict)
 569{
 570        struct rt6_info *rt, *match;
 571        int mpri = -1;
 572
 573        match = NULL;
 574        for (rt = rr_head; rt && rt->rt6i_metric == metric;
 575             rt = rt->dst.rt6_next)
 576                match = find_match(rt, oif, strict, &mpri, match);
 577        for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 578             rt = rt->dst.rt6_next)
 579                match = find_match(rt, oif, strict, &mpri, match);
 580
 581        return match;
 582}
 583
 584static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 585{
 586        struct rt6_info *match, *rt0;
 587        struct net *net;
 588
 589        rt0 = fn->rr_ptr;
 590        if (!rt0)
 591                fn->rr_ptr = rt0 = fn->leaf;
 592
 593        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 594
 595        if (!match &&
 596            (strict & RT6_LOOKUP_F_REACHABLE)) {
 597                struct rt6_info *next = rt0->dst.rt6_next;
 598
 599                /* no entries matched; do round-robin */
 600                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 601                        next = fn->leaf;
 602
 603                if (next != rt0)
 604                        fn->rr_ptr = next;
 605        }
 606
 607        net = dev_net(rt0->dst.dev);
 608        return match ? match : net->ipv6.ip6_null_entry;
 609}
 610
 611#ifdef CONFIG_IPV6_ROUTE_INFO
 612int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 613                  const struct in6_addr *gwaddr)
 614{
 615        struct net *net = dev_net(dev);
 616        struct route_info *rinfo = (struct route_info *) opt;
 617        struct in6_addr prefix_buf, *prefix;
 618        unsigned int pref;
 619        unsigned long lifetime;
 620        struct rt6_info *rt;
 621
 622        if (len < sizeof(struct route_info)) {
 623                return -EINVAL;
 624        }
 625
 626        /* Sanity check for prefix_len and length */
 627        if (rinfo->length > 3) {
 628                return -EINVAL;
 629        } else if (rinfo->prefix_len > 128) {
 630                return -EINVAL;
 631        } else if (rinfo->prefix_len > 64) {
 632                if (rinfo->length < 2) {
 633                        return -EINVAL;
 634                }
 635        } else if (rinfo->prefix_len > 0) {
 636                if (rinfo->length < 1) {
 637                        return -EINVAL;
 638                }
 639        }
 640
 641        pref = rinfo->route_pref;
 642        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 643                return -EINVAL;
 644
 645        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 646
 647        if (rinfo->length == 3)
 648                prefix = (struct in6_addr *)rinfo->prefix;
 649        else {
 650                /* this function is safe */
 651                ipv6_addr_prefix(&prefix_buf,
 652                                 (struct in6_addr *)rinfo->prefix,
 653                                 rinfo->prefix_len);
 654                prefix = &prefix_buf;
 655        }
 656
 657        rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 658                                dev->ifindex);
 659
 660        if (rt && !lifetime) {
 661                ip6_del_rt(rt);
 662                rt = NULL;
 663        }
 664
 665        if (!rt && lifetime)
 666                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 667                                        pref);
 668        else if (rt)
 669                rt->rt6i_flags = RTF_ROUTEINFO |
 670                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 671
 672        if (rt) {
 673                if (!addrconf_finite_timeout(lifetime))
 674                        rt6_clean_expires(rt);
 675                else
 676                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 677
 678                dst_release(&rt->dst);
 679        }
 680        return 0;
 681}
 682#endif
 683
 684#define BACKTRACK(__net, saddr)                 \
 685do { \
 686        if (rt == __net->ipv6.ip6_null_entry) { \
 687                struct fib6_node *pn; \
 688                while (1) { \
 689                        if (fn->fn_flags & RTN_TL_ROOT) \
 690                                goto out; \
 691                        pn = fn->parent; \
 692                        if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 693                                fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 694                        else \
 695                                fn = pn; \
 696                        if (fn->fn_flags & RTN_RTINFO) \
 697                                goto restart; \
 698                } \
 699        } \
 700} while (0)
 701
 702static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 703                                             struct fib6_table *table,
 704                                             struct flowi6 *fl6, int flags)
 705{
 706        struct fib6_node *fn;
 707        struct rt6_info *rt;
 708
 709        read_lock_bh(&table->tb6_lock);
 710        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 711restart:
 712        rt = fn->leaf;
 713        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 714        BACKTRACK(net, &fl6->saddr);
 715out:
 716        dst_use(&rt->dst, jiffies);
 717        read_unlock_bh(&table->tb6_lock);
 718        return rt;
 719
 720}
 721
 722struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 723                                    int flags)
 724{
 725        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 726}
 727EXPORT_SYMBOL_GPL(ip6_route_lookup);
 728
 729struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 730                            const struct in6_addr *saddr, int oif, int strict)
 731{
 732        struct flowi6 fl6 = {
 733                .flowi6_oif = oif,
 734                .daddr = *daddr,
 735        };
 736        struct dst_entry *dst;
 737        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 738
 739        if (saddr) {
 740                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 741                flags |= RT6_LOOKUP_F_HAS_SADDR;
 742        }
 743
 744        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 745        if (dst->error == 0)
 746                return (struct rt6_info *) dst;
 747
 748        dst_release(dst);
 749
 750        return NULL;
 751}
 752
 753EXPORT_SYMBOL(rt6_lookup);
 754
 755/* ip6_ins_rt is called with FREE table->tb6_lock.
 756   It takes new route entry, the addition fails by any reason the
 757   route is freed. In any case, if caller does not hold it, it may
 758   be destroyed.
 759 */
 760
 761static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 762{
 763        int err;
 764        struct fib6_table *table;
 765
 766        table = rt->rt6i_table;
 767        write_lock_bh(&table->tb6_lock);
 768        err = fib6_add(&table->tb6_root, rt, info);
 769        write_unlock_bh(&table->tb6_lock);
 770
 771        return err;
 772}
 773
 774int ip6_ins_rt(struct rt6_info *rt)
 775{
 776        struct nl_info info = {
 777                .nl_net = dev_net(rt->dst.dev),
 778        };
 779        return __ip6_ins_rt(rt, &info);
 780}
 781
 782static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 783                                      const struct in6_addr *daddr,
 784                                      const struct in6_addr *saddr)
 785{
 786        struct rt6_info *rt;
 787
 788        /*
 789         *      Clone the route.
 790         */
 791
 792        rt = ip6_rt_copy(ort, daddr);
 793
 794        if (rt) {
 795                int attempts = !in_softirq();
 796
 797                if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 798                        if (ort->rt6i_dst.plen != 128 &&
 799                            ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 800                                rt->rt6i_flags |= RTF_ANYCAST;
 801                        rt->rt6i_gateway = *daddr;
 802                }
 803
 804                rt->rt6i_flags |= RTF_CACHE;
 805
 806#ifdef CONFIG_IPV6_SUBTREES
 807                if (rt->rt6i_src.plen && saddr) {
 808                        rt->rt6i_src.addr = *saddr;
 809                        rt->rt6i_src.plen = 128;
 810                }
 811#endif
 812
 813        retry:
 814                if (rt6_bind_neighbour(rt, rt->dst.dev)) {
 815                        struct net *net = dev_net(rt->dst.dev);
 816                        int saved_rt_min_interval =
 817                                net->ipv6.sysctl.ip6_rt_gc_min_interval;
 818                        int saved_rt_elasticity =
 819                                net->ipv6.sysctl.ip6_rt_gc_elasticity;
 820
 821                        if (attempts-- > 0) {
 822                                net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 823                                net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 824
 825                                ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 826
 827                                net->ipv6.sysctl.ip6_rt_gc_elasticity =
 828                                        saved_rt_elasticity;
 829                                net->ipv6.sysctl.ip6_rt_gc_min_interval =
 830                                        saved_rt_min_interval;
 831                                goto retry;
 832                        }
 833
 834                        net_warn_ratelimited("Neighbour table overflow\n");
 835                        dst_free(&rt->dst);
 836                        return NULL;
 837                }
 838        }
 839
 840        return rt;
 841}
 842
 843static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 844                                        const struct in6_addr *daddr)
 845{
 846        struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 847
 848        if (rt) {
 849                rt->rt6i_flags |= RTF_CACHE;
 850                rt->n = neigh_clone(ort->n);
 851        }
 852        return rt;
 853}
 854
 855static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 856                                      struct flowi6 *fl6, int flags)
 857{
 858        struct fib6_node *fn;
 859        struct rt6_info *rt, *nrt;
 860        int strict = 0;
 861        int attempts = 3;
 862        int err;
 863        int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 864
 865        strict |= flags & RT6_LOOKUP_F_IFACE;
 866
 867relookup:
 868        read_lock_bh(&table->tb6_lock);
 869
 870restart_2:
 871        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 872
 873restart:
 874        rt = rt6_select(fn, oif, strict | reachable);
 875
 876        BACKTRACK(net, &fl6->saddr);
 877        if (rt == net->ipv6.ip6_null_entry ||
 878            rt->rt6i_flags & RTF_CACHE)
 879                goto out;
 880
 881        dst_hold(&rt->dst);
 882        read_unlock_bh(&table->tb6_lock);
 883
 884        if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
 885                nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 886        else if (!(rt->dst.flags & DST_HOST))
 887                nrt = rt6_alloc_clone(rt, &fl6->daddr);
 888        else
 889                goto out2;
 890
 891        dst_release(&rt->dst);
 892        rt = nrt ? : net->ipv6.ip6_null_entry;
 893
 894        dst_hold(&rt->dst);
 895        if (nrt) {
 896                err = ip6_ins_rt(nrt);
 897                if (!err)
 898                        goto out2;
 899        }
 900
 901        if (--attempts <= 0)
 902                goto out2;
 903
 904        /*
 905         * Race condition! In the gap, when table->tb6_lock was
 906         * released someone could insert this route.  Relookup.
 907         */
 908        dst_release(&rt->dst);
 909        goto relookup;
 910
 911out:
 912        if (reachable) {
 913                reachable = 0;
 914                goto restart_2;
 915        }
 916        dst_hold(&rt->dst);
 917        read_unlock_bh(&table->tb6_lock);
 918out2:
 919        rt->dst.lastuse = jiffies;
 920        rt->dst.__use++;
 921
 922        return rt;
 923}
 924
 925static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 926                                            struct flowi6 *fl6, int flags)
 927{
 928        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 929}
 930
 931static struct dst_entry *ip6_route_input_lookup(struct net *net,
 932                                                struct net_device *dev,
 933                                                struct flowi6 *fl6, int flags)
 934{
 935        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 936                flags |= RT6_LOOKUP_F_IFACE;
 937
 938        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 939}
 940
 941void ip6_route_input(struct sk_buff *skb)
 942{
 943        const struct ipv6hdr *iph = ipv6_hdr(skb);
 944        struct net *net = dev_net(skb->dev);
 945        int flags = RT6_LOOKUP_F_HAS_SADDR;
 946        struct flowi6 fl6 = {
 947                .flowi6_iif = skb->dev->ifindex,
 948                .daddr = iph->daddr,
 949                .saddr = iph->saddr,
 950                .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
 951                .flowi6_mark = skb->mark,
 952                .flowi6_proto = iph->nexthdr,
 953        };
 954
 955        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 956}
 957
 958static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 959                                             struct flowi6 *fl6, int flags)
 960{
 961        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 962}
 963
 964struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 965                                    struct flowi6 *fl6)
 966{
 967        int flags = 0;
 968
 969        fl6->flowi6_iif = net->loopback_dev->ifindex;
 970
 971        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 972                flags |= RT6_LOOKUP_F_IFACE;
 973
 974        if (!ipv6_addr_any(&fl6->saddr))
 975                flags |= RT6_LOOKUP_F_HAS_SADDR;
 976        else if (sk)
 977                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 978
 979        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 980}
 981
 982EXPORT_SYMBOL(ip6_route_output);
 983
 984struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 985{
 986        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 987        struct dst_entry *new = NULL;
 988
 989        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
 990        if (rt) {
 991                new = &rt->dst;
 992
 993                memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
 994                rt6_init_peer(rt, net->ipv6.peers);
 995
 996                new->__use = 1;
 997                new->input = dst_discard;
 998                new->output = dst_discard;
 999
1000                if (dst_metrics_read_only(&ort->dst))
1001                        new->_metrics = ort->dst._metrics;
1002                else
1003                        dst_copy_metrics(new, &ort->dst);
1004                rt->rt6i_idev = ort->rt6i_idev;
1005                if (rt->rt6i_idev)
1006                        in6_dev_hold(rt->rt6i_idev);
1007
1008                rt->rt6i_gateway = ort->rt6i_gateway;
1009                rt->rt6i_flags = ort->rt6i_flags;
1010                rt6_clean_expires(rt);
1011                rt->rt6i_metric = 0;
1012
1013                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1014#ifdef CONFIG_IPV6_SUBTREES
1015                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1016#endif
1017
1018                dst_free(new);
1019        }
1020
1021        dst_release(dst_orig);
1022        return new ? new : ERR_PTR(-ENOMEM);
1023}
1024
1025/*
1026 *      Destination cache support functions
1027 */
1028
1029static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1030{
1031        struct rt6_info *rt;
1032
1033        rt = (struct rt6_info *) dst;
1034
1035        /* All IPV6 dsts are created with ->obsolete set to the value
1036         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1037         * into this function always.
1038         */
1039        if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1040                return NULL;
1041
1042        if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1043                if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1044                        if (!rt6_has_peer(rt))
1045                                rt6_bind_peer(rt, 0);
1046                        rt->rt6i_peer_genid = rt6_peer_genid();
1047                }
1048                return dst;
1049        }
1050        return NULL;
1051}
1052
1053static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1054{
1055        struct rt6_info *rt = (struct rt6_info *) dst;
1056
1057        if (rt) {
1058                if (rt->rt6i_flags & RTF_CACHE) {
1059                        if (rt6_check_expired(rt)) {
1060                                ip6_del_rt(rt);
1061                                dst = NULL;
1062                        }
1063                } else {
1064                        dst_release(dst);
1065                        dst = NULL;
1066                }
1067        }
1068        return dst;
1069}
1070
1071static void ip6_link_failure(struct sk_buff *skb)
1072{
1073        struct rt6_info *rt;
1074
1075        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1076
1077        rt = (struct rt6_info *) skb_dst(skb);
1078        if (rt) {
1079                if (rt->rt6i_flags & RTF_CACHE)
1080                        rt6_update_expires(rt, 0);
1081                else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1082                        rt->rt6i_node->fn_sernum = -1;
1083        }
1084}
1085
1086static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1087                               struct sk_buff *skb, u32 mtu)
1088{
1089        struct rt6_info *rt6 = (struct rt6_info*)dst;
1090
1091        dst_confirm(dst);
1092        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1093                struct net *net = dev_net(dst->dev);
1094
1095                rt6->rt6i_flags |= RTF_MODIFIED;
1096                if (mtu < IPV6_MIN_MTU) {
1097                        u32 features = dst_metric(dst, RTAX_FEATURES);
1098                        mtu = IPV6_MIN_MTU;
1099                        features |= RTAX_FEATURE_ALLFRAG;
1100                        dst_metric_set(dst, RTAX_FEATURES, features);
1101                }
1102                dst_metric_set(dst, RTAX_MTU, mtu);
1103                rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1104        }
1105}
1106
1107void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108                     int oif, u32 mark)
1109{
1110        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1111        struct dst_entry *dst;
1112        struct flowi6 fl6;
1113
1114        memset(&fl6, 0, sizeof(fl6));
1115        fl6.flowi6_oif = oif;
1116        fl6.flowi6_mark = mark;
1117        fl6.flowi6_flags = 0;
1118        fl6.daddr = iph->daddr;
1119        fl6.saddr = iph->saddr;
1120        fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1121
1122        dst = ip6_route_output(net, NULL, &fl6);
1123        if (!dst->error)
1124                ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1125        dst_release(dst);
1126}
1127EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1128
1129void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1130{
1131        ip6_update_pmtu(skb, sock_net(sk), mtu,
1132                        sk->sk_bound_dev_if, sk->sk_mark);
1133}
1134EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1135
1136void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1137{
1138        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1139        struct dst_entry *dst;
1140        struct flowi6 fl6;
1141
1142        memset(&fl6, 0, sizeof(fl6));
1143        fl6.flowi6_oif = oif;
1144        fl6.flowi6_mark = mark;
1145        fl6.flowi6_flags = 0;
1146        fl6.daddr = iph->daddr;
1147        fl6.saddr = iph->saddr;
1148        fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1149
1150        dst = ip6_route_output(net, NULL, &fl6);
1151        if (!dst->error)
1152                rt6_do_redirect(dst, NULL, skb);
1153        dst_release(dst);
1154}
1155EXPORT_SYMBOL_GPL(ip6_redirect);
1156
1157void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158{
1159        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1160}
1161EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1162
1163static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1164{
1165        struct net_device *dev = dst->dev;
1166        unsigned int mtu = dst_mtu(dst);
1167        struct net *net = dev_net(dev);
1168
1169        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1170
1171        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1172                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1173
1174        /*
1175         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1176         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1177         * IPV6_MAXPLEN is also valid and means: "any MSS,
1178         * rely only on pmtu discovery"
1179         */
1180        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1181                mtu = IPV6_MAXPLEN;
1182        return mtu;
1183}
1184
1185static unsigned int ip6_mtu(const struct dst_entry *dst)
1186{
1187        struct inet6_dev *idev;
1188        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1189
1190        if (mtu)
1191                return mtu;
1192
1193        mtu = IPV6_MIN_MTU;
1194
1195        rcu_read_lock();
1196        idev = __in6_dev_get(dst->dev);
1197        if (idev)
1198                mtu = idev->cnf.mtu6;
1199        rcu_read_unlock();
1200
1201        return mtu;
1202}
1203
1204static struct dst_entry *icmp6_dst_gc_list;
1205static DEFINE_SPINLOCK(icmp6_dst_lock);
1206
1207struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1208                                  struct neighbour *neigh,
1209                                  struct flowi6 *fl6)
1210{
1211        struct dst_entry *dst;
1212        struct rt6_info *rt;
1213        struct inet6_dev *idev = in6_dev_get(dev);
1214        struct net *net = dev_net(dev);
1215
1216        if (unlikely(!idev))
1217                return ERR_PTR(-ENODEV);
1218
1219        rt = ip6_dst_alloc(net, dev, 0, NULL);
1220        if (unlikely(!rt)) {
1221                in6_dev_put(idev);
1222                dst = ERR_PTR(-ENOMEM);
1223                goto out;
1224        }
1225
1226        if (neigh)
1227                neigh_hold(neigh);
1228        else {
1229                neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1230                if (IS_ERR(neigh)) {
1231                        in6_dev_put(idev);
1232                        dst_free(&rt->dst);
1233                        return ERR_CAST(neigh);
1234                }
1235        }
1236
1237        rt->dst.flags |= DST_HOST;
1238        rt->dst.output  = ip6_output;
1239        rt->n = neigh;
1240        atomic_set(&rt->dst.__refcnt, 1);
1241        rt->rt6i_dst.addr = fl6->daddr;
1242        rt->rt6i_dst.plen = 128;
1243        rt->rt6i_idev     = idev;
1244        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1245
1246        spin_lock_bh(&icmp6_dst_lock);
1247        rt->dst.next = icmp6_dst_gc_list;
1248        icmp6_dst_gc_list = &rt->dst;
1249        spin_unlock_bh(&icmp6_dst_lock);
1250
1251        fib6_force_start_gc(net);
1252
1253        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1254
1255out:
1256        return dst;
1257}
1258
1259int icmp6_dst_gc(void)
1260{
1261        struct dst_entry *dst, **pprev;
1262        int more = 0;
1263
1264        spin_lock_bh(&icmp6_dst_lock);
1265        pprev = &icmp6_dst_gc_list;
1266
1267        while ((dst = *pprev) != NULL) {
1268                if (!atomic_read(&dst->__refcnt)) {
1269                        *pprev = dst->next;
1270                        dst_free(dst);
1271                } else {
1272                        pprev = &dst->next;
1273                        ++more;
1274                }
1275        }
1276
1277        spin_unlock_bh(&icmp6_dst_lock);
1278
1279        return more;
1280}
1281
1282static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1283                            void *arg)
1284{
1285        struct dst_entry *dst, **pprev;
1286
1287        spin_lock_bh(&icmp6_dst_lock);
1288        pprev = &icmp6_dst_gc_list;
1289        while ((dst = *pprev) != NULL) {
1290                struct rt6_info *rt = (struct rt6_info *) dst;
1291                if (func(rt, arg)) {
1292                        *pprev = dst->next;
1293                        dst_free(dst);
1294                } else {
1295                        pprev = &dst->next;
1296                }
1297        }
1298        spin_unlock_bh(&icmp6_dst_lock);
1299}
1300
1301static int ip6_dst_gc(struct dst_ops *ops)
1302{
1303        unsigned long now = jiffies;
1304        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1305        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1306        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1307        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1308        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1309        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1310        int entries;
1311
1312        entries = dst_entries_get_fast(ops);
1313        if (time_after(rt_last_gc + rt_min_interval, now) &&
1314            entries <= rt_max_size)
1315                goto out;
1316
1317        net->ipv6.ip6_rt_gc_expire++;
1318        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1319        net->ipv6.ip6_rt_last_gc = now;
1320        entries = dst_entries_get_slow(ops);
1321        if (entries < ops->gc_thresh)
1322                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1323out:
1324        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1325        return entries > rt_max_size;
1326}
1327
1328/* Clean host part of a prefix. Not necessary in radix tree,
1329   but results in cleaner routing tables.
1330
1331   Remove it only when all the things will work!
1332 */
1333
1334int ip6_dst_hoplimit(struct dst_entry *dst)
1335{
1336        int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1337        if (hoplimit == 0) {
1338                struct net_device *dev = dst->dev;
1339                struct inet6_dev *idev;
1340
1341                rcu_read_lock();
1342                idev = __in6_dev_get(dev);
1343                if (idev)
1344                        hoplimit = idev->cnf.hop_limit;
1345                else
1346                        hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1347                rcu_read_unlock();
1348        }
1349        return hoplimit;
1350}
1351EXPORT_SYMBOL(ip6_dst_hoplimit);
1352
1353/*
1354 *
1355 */
1356
1357int ip6_route_add(struct fib6_config *cfg)
1358{
1359        int err;
1360        struct net *net = cfg->fc_nlinfo.nl_net;
1361        struct rt6_info *rt = NULL;
1362        struct net_device *dev = NULL;
1363        struct inet6_dev *idev = NULL;
1364        struct fib6_table *table;
1365        int addr_type;
1366
1367        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1368                return -EINVAL;
1369#ifndef CONFIG_IPV6_SUBTREES
1370        if (cfg->fc_src_len)
1371                return -EINVAL;
1372#endif
1373        if (cfg->fc_ifindex) {
1374                err = -ENODEV;
1375                dev = dev_get_by_index(net, cfg->fc_ifindex);
1376                if (!dev)
1377                        goto out;
1378                idev = in6_dev_get(dev);
1379                if (!idev)
1380                        goto out;
1381        }
1382
1383        if (cfg->fc_metric == 0)
1384                cfg->fc_metric = IP6_RT_PRIO_USER;
1385
1386        err = -ENOBUFS;
1387        if (cfg->fc_nlinfo.nlh &&
1388            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1389                table = fib6_get_table(net, cfg->fc_table);
1390                if (!table) {
1391                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1392                        table = fib6_new_table(net, cfg->fc_table);
1393                }
1394        } else {
1395                table = fib6_new_table(net, cfg->fc_table);
1396        }
1397
1398        if (!table)
1399                goto out;
1400
1401        rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1402
1403        if (!rt) {
1404                err = -ENOMEM;
1405                goto out;
1406        }
1407
1408        if (cfg->fc_flags & RTF_EXPIRES)
1409                rt6_set_expires(rt, jiffies +
1410                                clock_t_to_jiffies(cfg->fc_expires));
1411        else
1412                rt6_clean_expires(rt);
1413
1414        if (cfg->fc_protocol == RTPROT_UNSPEC)
1415                cfg->fc_protocol = RTPROT_BOOT;
1416        rt->rt6i_protocol = cfg->fc_protocol;
1417
1418        addr_type = ipv6_addr_type(&cfg->fc_dst);
1419
1420        if (addr_type & IPV6_ADDR_MULTICAST)
1421                rt->dst.input = ip6_mc_input;
1422        else if (cfg->fc_flags & RTF_LOCAL)
1423                rt->dst.input = ip6_input;
1424        else
1425                rt->dst.input = ip6_forward;
1426
1427        rt->dst.output = ip6_output;
1428
1429        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1430        rt->rt6i_dst.plen = cfg->fc_dst_len;
1431        if (rt->rt6i_dst.plen == 128)
1432               rt->dst.flags |= DST_HOST;
1433
1434        if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1435                u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1436                if (!metrics) {
1437                        err = -ENOMEM;
1438                        goto out;
1439                }
1440                dst_init_metrics(&rt->dst, metrics, 0);
1441        }
1442#ifdef CONFIG_IPV6_SUBTREES
1443        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1444        rt->rt6i_src.plen = cfg->fc_src_len;
1445#endif
1446
1447        rt->rt6i_metric = cfg->fc_metric;
1448
1449        /* We cannot add true routes via loopback here,
1450           they would result in kernel looping; promote them to reject routes
1451         */
1452        if ((cfg->fc_flags & RTF_REJECT) ||
1453            (dev && (dev->flags & IFF_LOOPBACK) &&
1454             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1455             !(cfg->fc_flags & RTF_LOCAL))) {
1456                /* hold loopback dev/idev if we haven't done so. */
1457                if (dev != net->loopback_dev) {
1458                        if (dev) {
1459                                dev_put(dev);
1460                                in6_dev_put(idev);
1461                        }
1462                        dev = net->loopback_dev;
1463                        dev_hold(dev);
1464                        idev = in6_dev_get(dev);
1465                        if (!idev) {
1466                                err = -ENODEV;
1467                                goto out;
1468                        }
1469                }
1470                rt->dst.output = ip6_pkt_discard_out;
1471                rt->dst.input = ip6_pkt_discard;
1472                rt->dst.error = -ENETUNREACH;
1473                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1474                goto install_route;
1475        }
1476
1477        if (cfg->fc_flags & RTF_GATEWAY) {
1478                const struct in6_addr *gw_addr;
1479                int gwa_type;
1480
1481                gw_addr = &cfg->fc_gateway;
1482                rt->rt6i_gateway = *gw_addr;
1483                gwa_type = ipv6_addr_type(gw_addr);
1484
1485                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1486                        struct rt6_info *grt;
1487
1488                        /* IPv6 strictly inhibits using not link-local
1489                           addresses as nexthop address.
1490                           Otherwise, router will not able to send redirects.
1491                           It is very good, but in some (rare!) circumstances
1492                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1493                           some exceptions. --ANK
1494                         */
1495                        err = -EINVAL;
1496                        if (!(gwa_type & IPV6_ADDR_UNICAST))
1497                                goto out;
1498
1499                        grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1500
1501                        err = -EHOSTUNREACH;
1502                        if (!grt)
1503                                goto out;
1504                        if (dev) {
1505                                if (dev != grt->dst.dev) {
1506                                        dst_release(&grt->dst);
1507                                        goto out;
1508                                }
1509                        } else {
1510                                dev = grt->dst.dev;
1511                                idev = grt->rt6i_idev;
1512                                dev_hold(dev);
1513                                in6_dev_hold(grt->rt6i_idev);
1514                        }
1515                        if (!(grt->rt6i_flags & RTF_GATEWAY))
1516                                err = 0;
1517                        dst_release(&grt->dst);
1518
1519                        if (err)
1520                                goto out;
1521                }
1522                err = -EINVAL;
1523                if (!dev || (dev->flags & IFF_LOOPBACK))
1524                        goto out;
1525        }
1526
1527        err = -ENODEV;
1528        if (!dev)
1529                goto out;
1530
1531        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1532                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1533                        err = -EINVAL;
1534                        goto out;
1535                }
1536                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1537                rt->rt6i_prefsrc.plen = 128;
1538        } else
1539                rt->rt6i_prefsrc.plen = 0;
1540
1541        if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1542                err = rt6_bind_neighbour(rt, dev);
1543                if (err)
1544                        goto out;
1545        }
1546
1547        rt->rt6i_flags = cfg->fc_flags;
1548
1549install_route:
1550        if (cfg->fc_mx) {
1551                struct nlattr *nla;
1552                int remaining;
1553
1554                nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1555                        int type = nla_type(nla);
1556
1557                        if (type) {
1558                                if (type > RTAX_MAX) {
1559                                        err = -EINVAL;
1560                                        goto out;
1561                                }
1562
1563                                dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1564                        }
1565                }
1566        }
1567
1568        rt->dst.dev = dev;
1569        rt->rt6i_idev = idev;
1570        rt->rt6i_table = table;
1571
1572        cfg->fc_nlinfo.nl_net = dev_net(dev);
1573
1574        return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1575
1576out:
1577        if (dev)
1578                dev_put(dev);
1579        if (idev)
1580                in6_dev_put(idev);
1581        if (rt)
1582                dst_free(&rt->dst);
1583        return err;
1584}
1585
1586static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1587{
1588        int err;
1589        struct fib6_table *table;
1590        struct net *net = dev_net(rt->dst.dev);
1591
1592        if (rt == net->ipv6.ip6_null_entry)
1593                return -ENOENT;
1594
1595        table = rt->rt6i_table;
1596        write_lock_bh(&table->tb6_lock);
1597
1598        err = fib6_del(rt, info);
1599        dst_release(&rt->dst);
1600
1601        write_unlock_bh(&table->tb6_lock);
1602
1603        return err;
1604}
1605
1606int ip6_del_rt(struct rt6_info *rt)
1607{
1608        struct nl_info info = {
1609                .nl_net = dev_net(rt->dst.dev),
1610        };
1611        return __ip6_del_rt(rt, &info);
1612}
1613
1614static int ip6_route_del(struct fib6_config *cfg)
1615{
1616        struct fib6_table *table;
1617        struct fib6_node *fn;
1618        struct rt6_info *rt;
1619        int err = -ESRCH;
1620
1621        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1622        if (!table)
1623                return err;
1624
1625        read_lock_bh(&table->tb6_lock);
1626
1627        fn = fib6_locate(&table->tb6_root,
1628                         &cfg->fc_dst, cfg->fc_dst_len,
1629                         &cfg->fc_src, cfg->fc_src_len);
1630
1631        if (fn) {
1632                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1633                        if (cfg->fc_ifindex &&
1634                            (!rt->dst.dev ||
1635                             rt->dst.dev->ifindex != cfg->fc_ifindex))
1636                                continue;
1637                        if (cfg->fc_flags & RTF_GATEWAY &&
1638                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1639                                continue;
1640                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1641                                continue;
1642                        dst_hold(&rt->dst);
1643                        read_unlock_bh(&table->tb6_lock);
1644
1645                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1646                }
1647        }
1648        read_unlock_bh(&table->tb6_lock);
1649
1650        return err;
1651}
1652
1653static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1654{
1655        struct net *net = dev_net(skb->dev);
1656        struct netevent_redirect netevent;
1657        struct rt6_info *rt, *nrt = NULL;
1658        const struct in6_addr *target;
1659        struct ndisc_options ndopts;
1660        const struct in6_addr *dest;
1661        struct neighbour *old_neigh;
1662        struct inet6_dev *in6_dev;
1663        struct neighbour *neigh;
1664        struct icmp6hdr *icmph;
1665        int optlen, on_link;
1666        u8 *lladdr;
1667
1668        optlen = skb->tail - skb->transport_header;
1669        optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1670
1671        if (optlen < 0) {
1672                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1673                return;
1674        }
1675
1676        icmph = icmp6_hdr(skb);
1677        target = (const struct in6_addr *) (icmph + 1);
1678        dest = target + 1;
1679
1680        if (ipv6_addr_is_multicast(dest)) {
1681                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1682                return;
1683        }
1684
1685        on_link = 0;
1686        if (ipv6_addr_equal(dest, target)) {
1687                on_link = 1;
1688        } else if (ipv6_addr_type(target) !=
1689                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1690                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1691                return;
1692        }
1693
1694        in6_dev = __in6_dev_get(skb->dev);
1695        if (!in6_dev)
1696                return;
1697        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1698                return;
1699
1700        /* RFC2461 8.1:
1701         *      The IP source address of the Redirect MUST be the same as the current
1702         *      first-hop router for the specified ICMP Destination Address.
1703         */
1704
1705        if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1706                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1707                return;
1708        }
1709
1710        lladdr = NULL;
1711        if (ndopts.nd_opts_tgt_lladdr) {
1712                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1713                                             skb->dev);
1714                if (!lladdr) {
1715                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1716                        return;
1717                }
1718        }
1719
1720        rt = (struct rt6_info *) dst;
1721        if (rt == net->ipv6.ip6_null_entry) {
1722                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1723                return;
1724        }
1725
1726        /* Redirect received -> path was valid.
1727         * Look, redirects are sent only in response to data packets,
1728         * so that this nexthop apparently is reachable. --ANK
1729         */
1730        dst_confirm(&rt->dst);
1731
1732        neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1733        if (!neigh)
1734                return;
1735
1736        /* Duplicate redirect: silently ignore. */
1737        old_neigh = rt->n;
1738        if (neigh == old_neigh)
1739                goto out;
1740
1741        /*
1742         *      We have finally decided to accept it.
1743         */
1744
1745        neigh_update(neigh, lladdr, NUD_STALE,
1746                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1747                     NEIGH_UPDATE_F_OVERRIDE|
1748                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1749                                     NEIGH_UPDATE_F_ISROUTER))
1750                     );
1751
1752        nrt = ip6_rt_copy(rt, dest);
1753        if (!nrt)
1754                goto out;
1755
1756        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1757        if (on_link)
1758                nrt->rt6i_flags &= ~RTF_GATEWAY;
1759
1760        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1761        nrt->n = neigh_clone(neigh);
1762
1763        if (ip6_ins_rt(nrt))
1764                goto out;
1765
1766        netevent.old = &rt->dst;
1767        netevent.old_neigh = old_neigh;
1768        netevent.new = &nrt->dst;
1769        netevent.new_neigh = neigh;
1770        netevent.daddr = dest;
1771        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1772
1773        if (rt->rt6i_flags & RTF_CACHE) {
1774                rt = (struct rt6_info *) dst_clone(&rt->dst);
1775                ip6_del_rt(rt);
1776        }
1777
1778out:
1779        neigh_release(neigh);
1780}
1781
1782/*
1783 *      Misc support functions
1784 */
1785
1786static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1787                                    const struct in6_addr *dest)
1788{
1789        struct net *net = dev_net(ort->dst.dev);
1790        struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1791                                            ort->rt6i_table);
1792
1793        if (rt) {
1794                rt->dst.input = ort->dst.input;
1795                rt->dst.output = ort->dst.output;
1796                rt->dst.flags |= DST_HOST;
1797
1798                rt->rt6i_dst.addr = *dest;
1799                rt->rt6i_dst.plen = 128;
1800                dst_copy_metrics(&rt->dst, &ort->dst);
1801                rt->dst.error = ort->dst.error;
1802                rt->rt6i_idev = ort->rt6i_idev;
1803                if (rt->rt6i_idev)
1804                        in6_dev_hold(rt->rt6i_idev);
1805                rt->dst.lastuse = jiffies;
1806
1807                rt->rt6i_gateway = ort->rt6i_gateway;
1808                rt->rt6i_flags = ort->rt6i_flags;
1809                if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1810                    (RTF_DEFAULT | RTF_ADDRCONF))
1811                        rt6_set_from(rt, ort);
1812                else
1813                        rt6_clean_expires(rt);
1814                rt->rt6i_metric = 0;
1815
1816#ifdef CONFIG_IPV6_SUBTREES
1817                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1818#endif
1819                memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1820                rt->rt6i_table = ort->rt6i_table;
1821        }
1822        return rt;
1823}
1824
1825#ifdef CONFIG_IPV6_ROUTE_INFO
1826static struct rt6_info *rt6_get_route_info(struct net *net,
1827                                           const struct in6_addr *prefix, int prefixlen,
1828                                           const struct in6_addr *gwaddr, int ifindex)
1829{
1830        struct fib6_node *fn;
1831        struct rt6_info *rt = NULL;
1832        struct fib6_table *table;
1833
1834        table = fib6_get_table(net, RT6_TABLE_INFO);
1835        if (!table)
1836                return NULL;
1837
1838        write_lock_bh(&table->tb6_lock);
1839        fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1840        if (!fn)
1841                goto out;
1842
1843        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1844                if (rt->dst.dev->ifindex != ifindex)
1845                        continue;
1846                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1847                        continue;
1848                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1849                        continue;
1850                dst_hold(&rt->dst);
1851                break;
1852        }
1853out:
1854        write_unlock_bh(&table->tb6_lock);
1855        return rt;
1856}
1857
1858static struct rt6_info *rt6_add_route_info(struct net *net,
1859                                           const struct in6_addr *prefix, int prefixlen,
1860                                           const struct in6_addr *gwaddr, int ifindex,
1861                                           unsigned int pref)
1862{
1863        struct fib6_config cfg = {
1864                .fc_table       = RT6_TABLE_INFO,
1865                .fc_metric      = IP6_RT_PRIO_USER,
1866                .fc_ifindex     = ifindex,
1867                .fc_dst_len     = prefixlen,
1868                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1869                                  RTF_UP | RTF_PREF(pref),
1870                .fc_nlinfo.pid = 0,
1871                .fc_nlinfo.nlh = NULL,
1872                .fc_nlinfo.nl_net = net,
1873        };
1874
1875        cfg.fc_dst = *prefix;
1876        cfg.fc_gateway = *gwaddr;
1877
1878        /* We should treat it as a default route if prefix length is 0. */
1879        if (!prefixlen)
1880                cfg.fc_flags |= RTF_DEFAULT;
1881
1882        ip6_route_add(&cfg);
1883
1884        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1885}
1886#endif
1887
1888struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1889{
1890        struct rt6_info *rt;
1891        struct fib6_table *table;
1892
1893        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1894        if (!table)
1895                return NULL;
1896
1897        write_lock_bh(&table->tb6_lock);
1898        for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1899                if (dev == rt->dst.dev &&
1900                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1901                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1902                        break;
1903        }
1904        if (rt)
1905                dst_hold(&rt->dst);
1906        write_unlock_bh(&table->tb6_lock);
1907        return rt;
1908}
1909
1910struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1911                                     struct net_device *dev,
1912                                     unsigned int pref)
1913{
1914        struct fib6_config cfg = {
1915                .fc_table       = RT6_TABLE_DFLT,
1916                .fc_metric      = IP6_RT_PRIO_USER,
1917                .fc_ifindex     = dev->ifindex,
1918                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1919                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1920                .fc_nlinfo.pid = 0,
1921                .fc_nlinfo.nlh = NULL,
1922                .fc_nlinfo.nl_net = dev_net(dev),
1923        };
1924
1925        cfg.fc_gateway = *gwaddr;
1926
1927        ip6_route_add(&cfg);
1928
1929        return rt6_get_dflt_router(gwaddr, dev);
1930}
1931
1932void rt6_purge_dflt_routers(struct net *net)
1933{
1934        struct rt6_info *rt;
1935        struct fib6_table *table;
1936
1937        /* NOTE: Keep consistent with rt6_get_dflt_router */
1938        table = fib6_get_table(net, RT6_TABLE_DFLT);
1939        if (!table)
1940                return;
1941
1942restart:
1943        read_lock_bh(&table->tb6_lock);
1944        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1945                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1946                        dst_hold(&rt->dst);
1947                        read_unlock_bh(&table->tb6_lock);
1948                        ip6_del_rt(rt);
1949                        goto restart;
1950                }
1951        }
1952        read_unlock_bh(&table->tb6_lock);
1953}
1954
1955static void rtmsg_to_fib6_config(struct net *net,
1956                                 struct in6_rtmsg *rtmsg,
1957                                 struct fib6_config *cfg)
1958{
1959        memset(cfg, 0, sizeof(*cfg));
1960
1961        cfg->fc_table = RT6_TABLE_MAIN;
1962        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1963        cfg->fc_metric = rtmsg->rtmsg_metric;
1964        cfg->fc_expires = rtmsg->rtmsg_info;
1965        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1966        cfg->fc_src_len = rtmsg->rtmsg_src_len;
1967        cfg->fc_flags = rtmsg->rtmsg_flags;
1968
1969        cfg->fc_nlinfo.nl_net = net;
1970
1971        cfg->fc_dst = rtmsg->rtmsg_dst;
1972        cfg->fc_src = rtmsg->rtmsg_src;
1973        cfg->fc_gateway = rtmsg->rtmsg_gateway;
1974}
1975
1976int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1977{
1978        struct fib6_config cfg;
1979        struct in6_rtmsg rtmsg;
1980        int err;
1981
1982        switch(cmd) {
1983        case SIOCADDRT:         /* Add a route */
1984        case SIOCDELRT:         /* Delete a route */
1985                if (!capable(CAP_NET_ADMIN))
1986                        return -EPERM;
1987                err = copy_from_user(&rtmsg, arg,
1988                                     sizeof(struct in6_rtmsg));
1989                if (err)
1990                        return -EFAULT;
1991
1992                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1993
1994                rtnl_lock();
1995                switch (cmd) {
1996                case SIOCADDRT:
1997                        err = ip6_route_add(&cfg);
1998                        break;
1999                case SIOCDELRT:
2000                        err = ip6_route_del(&cfg);
2001                        break;
2002                default:
2003                        err = -EINVAL;
2004                }
2005                rtnl_unlock();
2006
2007                return err;
2008        }
2009
2010        return -EINVAL;
2011}
2012
2013/*
2014 *      Drop the packet on the floor
2015 */
2016
2017static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2018{
2019        int type;
2020        struct dst_entry *dst = skb_dst(skb);
2021        switch (ipstats_mib_noroutes) {
2022        case IPSTATS_MIB_INNOROUTES:
2023                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2024                if (type == IPV6_ADDR_ANY) {
2025                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2026                                      IPSTATS_MIB_INADDRERRORS);
2027                        break;
2028                }
2029                /* FALLTHROUGH */
2030        case IPSTATS_MIB_OUTNOROUTES:
2031                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2032                              ipstats_mib_noroutes);
2033                break;
2034        }
2035        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2036        kfree_skb(skb);
2037        return 0;
2038}
2039
2040static int ip6_pkt_discard(struct sk_buff *skb)
2041{
2042        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2043}
2044
2045static int ip6_pkt_discard_out(struct sk_buff *skb)
2046{
2047        skb->dev = skb_dst(skb)->dev;
2048        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2049}
2050
2051#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2052
2053static int ip6_pkt_prohibit(struct sk_buff *skb)
2054{
2055        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2056}
2057
2058static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2059{
2060        skb->dev = skb_dst(skb)->dev;
2061        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2062}
2063
2064#endif
2065
2066/*
2067 *      Allocate a dst for local (unicast / anycast) address.
2068 */
2069
2070struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2071                                    const struct in6_addr *addr,
2072                                    bool anycast)
2073{
2074        struct net *net = dev_net(idev->dev);
2075        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2076        int err;
2077
2078        if (!rt) {
2079                net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2080                return ERR_PTR(-ENOMEM);
2081        }
2082
2083        in6_dev_hold(idev);
2084
2085        rt->dst.flags |= DST_HOST;
2086        rt->dst.input = ip6_input;
2087        rt->dst.output = ip6_output;
2088        rt->rt6i_idev = idev;
2089
2090        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2091        if (anycast)
2092                rt->rt6i_flags |= RTF_ANYCAST;
2093        else
2094                rt->rt6i_flags |= RTF_LOCAL;
2095        err = rt6_bind_neighbour(rt, rt->dst.dev);
2096        if (err) {
2097                dst_free(&rt->dst);
2098                return ERR_PTR(err);
2099        }
2100
2101        rt->rt6i_dst.addr = *addr;
2102        rt->rt6i_dst.plen = 128;
2103        rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2104
2105        atomic_set(&rt->dst.__refcnt, 1);
2106
2107        return rt;
2108}
2109
2110int ip6_route_get_saddr(struct net *net,
2111                        struct rt6_info *rt,
2112                        const struct in6_addr *daddr,
2113                        unsigned int prefs,
2114                        struct in6_addr *saddr)
2115{
2116        struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2117        int err = 0;
2118        if (rt->rt6i_prefsrc.plen)
2119                *saddr = rt->rt6i_prefsrc.addr;
2120        else
2121                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2122                                         daddr, prefs, saddr);
2123        return err;
2124}
2125
2126/* remove deleted ip from prefsrc entries */
2127struct arg_dev_net_ip {
2128        struct net_device *dev;
2129        struct net *net;
2130        struct in6_addr *addr;
2131};
2132
2133static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2134{
2135        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2136        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2137        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2138
2139        if (((void *)rt->dst.dev == dev || !dev) &&
2140            rt != net->ipv6.ip6_null_entry &&
2141            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2142                /* remove prefsrc entry */
2143                rt->rt6i_prefsrc.plen = 0;
2144        }
2145        return 0;
2146}
2147
2148void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2149{
2150        struct net *net = dev_net(ifp->idev->dev);
2151        struct arg_dev_net_ip adni = {
2152                .dev = ifp->idev->dev,
2153                .net = net,
2154                .addr = &ifp->addr,
2155        };
2156        fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2157}
2158
2159struct arg_dev_net {
2160        struct net_device *dev;
2161        struct net *net;
2162};
2163
2164static int fib6_ifdown(struct rt6_info *rt, void *arg)
2165{
2166        const struct arg_dev_net *adn = arg;
2167        const struct net_device *dev = adn->dev;
2168
2169        if ((rt->dst.dev == dev || !dev) &&
2170            rt != adn->net->ipv6.ip6_null_entry)
2171                return -1;
2172
2173        return 0;
2174}
2175
2176void rt6_ifdown(struct net *net, struct net_device *dev)
2177{
2178        struct arg_dev_net adn = {
2179                .dev = dev,
2180                .net = net,
2181        };
2182
2183        fib6_clean_all(net, fib6_ifdown, 0, &adn);
2184        icmp6_clean_all(fib6_ifdown, &adn);
2185}
2186
2187struct rt6_mtu_change_arg {
2188        struct net_device *dev;
2189        unsigned int mtu;
2190};
2191
2192static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2193{
2194        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2195        struct inet6_dev *idev;
2196
2197        /* In IPv6 pmtu discovery is not optional,
2198           so that RTAX_MTU lock cannot disable it.
2199           We still use this lock to block changes
2200           caused by addrconf/ndisc.
2201        */
2202
2203        idev = __in6_dev_get(arg->dev);
2204        if (!idev)
2205                return 0;
2206
2207        /* For administrative MTU increase, there is no way to discover
2208           IPv6 PMTU increase, so PMTU increase should be updated here.
2209           Since RFC 1981 doesn't include administrative MTU increase
2210           update PMTU increase is a MUST. (i.e. jumbo frame)
2211         */
2212        /*
2213           If new MTU is less than route PMTU, this new MTU will be the
2214           lowest MTU in the path, update the route PMTU to reflect PMTU
2215           decreases; if new MTU is greater than route PMTU, and the
2216           old MTU is the lowest MTU in the path, update the route PMTU
2217           to reflect the increase. In this case if the other nodes' MTU
2218           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2219           PMTU discouvery.
2220         */
2221        if (rt->dst.dev == arg->dev &&
2222            !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2223            (dst_mtu(&rt->dst) >= arg->mtu ||
2224             (dst_mtu(&rt->dst) < arg->mtu &&
2225              dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2226                dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2227        }
2228        return 0;
2229}
2230
2231void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2232{
2233        struct rt6_mtu_change_arg arg = {
2234                .dev = dev,
2235                .mtu = mtu,
2236        };
2237
2238        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2239}
2240
2241static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2242        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2243        [RTA_OIF]               = { .type = NLA_U32 },
2244        [RTA_IIF]               = { .type = NLA_U32 },
2245        [RTA_PRIORITY]          = { .type = NLA_U32 },
2246        [RTA_METRICS]           = { .type = NLA_NESTED },
2247};
2248
2249static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2250                              struct fib6_config *cfg)
2251{
2252        struct rtmsg *rtm;
2253        struct nlattr *tb[RTA_MAX+1];
2254        int err;
2255
2256        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2257        if (err < 0)
2258                goto errout;
2259
2260        err = -EINVAL;
2261        rtm = nlmsg_data(nlh);
2262        memset(cfg, 0, sizeof(*cfg));
2263
2264        cfg->fc_table = rtm->rtm_table;
2265        cfg->fc_dst_len = rtm->rtm_dst_len;
2266        cfg->fc_src_len = rtm->rtm_src_len;
2267        cfg->fc_flags = RTF_UP;
2268        cfg->fc_protocol = rtm->rtm_protocol;
2269
2270        if (rtm->rtm_type == RTN_UNREACHABLE)
2271                cfg->fc_flags |= RTF_REJECT;
2272
2273        if (rtm->rtm_type == RTN_LOCAL)
2274                cfg->fc_flags |= RTF_LOCAL;
2275
2276        cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2277        cfg->fc_nlinfo.nlh = nlh;
2278        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2279
2280        if (tb[RTA_GATEWAY]) {
2281                nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2282                cfg->fc_flags |= RTF_GATEWAY;
2283        }
2284
2285        if (tb[RTA_DST]) {
2286                int plen = (rtm->rtm_dst_len + 7) >> 3;
2287
2288                if (nla_len(tb[RTA_DST]) < plen)
2289                        goto errout;
2290
2291                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2292        }
2293
2294        if (tb[RTA_SRC]) {
2295                int plen = (rtm->rtm_src_len + 7) >> 3;
2296
2297                if (nla_len(tb[RTA_SRC]) < plen)
2298                        goto errout;
2299
2300                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2301        }
2302
2303        if (tb[RTA_PREFSRC])
2304                nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2305
2306        if (tb[RTA_OIF])
2307                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2308
2309        if (tb[RTA_PRIORITY])
2310                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2311
2312        if (tb[RTA_METRICS]) {
2313                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2314                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2315        }
2316
2317        if (tb[RTA_TABLE])
2318                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2319
2320        err = 0;
2321errout:
2322        return err;
2323}
2324
2325static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2326{
2327        struct fib6_config cfg;
2328        int err;
2329
2330        err = rtm_to_fib6_config(skb, nlh, &cfg);
2331        if (err < 0)
2332                return err;
2333
2334        return ip6_route_del(&cfg);
2335}
2336
2337static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2338{
2339        struct fib6_config cfg;
2340        int err;
2341
2342        err = rtm_to_fib6_config(skb, nlh, &cfg);
2343        if (err < 0)
2344                return err;
2345
2346        return ip6_route_add(&cfg);
2347}
2348
2349static inline size_t rt6_nlmsg_size(void)
2350{
2351        return NLMSG_ALIGN(sizeof(struct rtmsg))
2352               + nla_total_size(16) /* RTA_SRC */
2353               + nla_total_size(16) /* RTA_DST */
2354               + nla_total_size(16) /* RTA_GATEWAY */
2355               + nla_total_size(16) /* RTA_PREFSRC */
2356               + nla_total_size(4) /* RTA_TABLE */
2357               + nla_total_size(4) /* RTA_IIF */
2358               + nla_total_size(4) /* RTA_OIF */
2359               + nla_total_size(4) /* RTA_PRIORITY */
2360               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2361               + nla_total_size(sizeof(struct rta_cacheinfo));
2362}
2363
2364static int rt6_fill_node(struct net *net,
2365                         struct sk_buff *skb, struct rt6_info *rt,
2366                         struct in6_addr *dst, struct in6_addr *src,
2367                         int iif, int type, u32 pid, u32 seq,
2368                         int prefix, int nowait, unsigned int flags)
2369{
2370        struct rtmsg *rtm;
2371        struct nlmsghdr *nlh;
2372        long expires;
2373        u32 table;
2374        struct neighbour *n;
2375
2376        if (prefix) {   /* user wants prefix routes only */
2377                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2378                        /* success since this is not a prefix route */
2379                        return 1;
2380                }
2381        }
2382
2383        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2384        if (!nlh)
2385                return -EMSGSIZE;
2386
2387        rtm = nlmsg_data(nlh);
2388        rtm->rtm_family = AF_INET6;
2389        rtm->rtm_dst_len = rt->rt6i_dst.plen;
2390        rtm->rtm_src_len = rt->rt6i_src.plen;
2391        rtm->rtm_tos = 0;
2392        if (rt->rt6i_table)
2393                table = rt->rt6i_table->tb6_id;
2394        else
2395                table = RT6_TABLE_UNSPEC;
2396        rtm->rtm_table = table;
2397        if (nla_put_u32(skb, RTA_TABLE, table))
2398                goto nla_put_failure;
2399        if (rt->rt6i_flags & RTF_REJECT)
2400                rtm->rtm_type = RTN_UNREACHABLE;
2401        else if (rt->rt6i_flags & RTF_LOCAL)
2402                rtm->rtm_type = RTN_LOCAL;
2403        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2404                rtm->rtm_type = RTN_LOCAL;
2405        else
2406                rtm->rtm_type = RTN_UNICAST;
2407        rtm->rtm_flags = 0;
2408        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2409        rtm->rtm_protocol = rt->rt6i_protocol;
2410        if (rt->rt6i_flags & RTF_DYNAMIC)
2411                rtm->rtm_protocol = RTPROT_REDIRECT;
2412        else if (rt->rt6i_flags & RTF_ADDRCONF) {
2413                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2414                        rtm->rtm_protocol = RTPROT_RA;
2415                else
2416                        rtm->rtm_protocol = RTPROT_KERNEL;
2417        }
2418
2419        if (rt->rt6i_flags & RTF_CACHE)
2420                rtm->rtm_flags |= RTM_F_CLONED;
2421
2422        if (dst) {
2423                if (nla_put(skb, RTA_DST, 16, dst))
2424                        goto nla_put_failure;
2425                rtm->rtm_dst_len = 128;
2426        } else if (rtm->rtm_dst_len)
2427                if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2428                        goto nla_put_failure;
2429#ifdef CONFIG_IPV6_SUBTREES
2430        if (src) {
2431                if (nla_put(skb, RTA_SRC, 16, src))
2432                        goto nla_put_failure;
2433                rtm->rtm_src_len = 128;
2434        } else if (rtm->rtm_src_len &&
2435                   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2436                goto nla_put_failure;
2437#endif
2438        if (iif) {
2439#ifdef CONFIG_IPV6_MROUTE
2440                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2441                        int err = ip6mr_get_route(net, skb, rtm, nowait);
2442                        if (err <= 0) {
2443                                if (!nowait) {
2444                                        if (err == 0)
2445                                                return 0;
2446                                        goto nla_put_failure;
2447                                } else {
2448                                        if (err == -EMSGSIZE)
2449                                                goto nla_put_failure;
2450                                }
2451                        }
2452                } else
2453#endif
2454                        if (nla_put_u32(skb, RTA_IIF, iif))
2455                                goto nla_put_failure;
2456        } else if (dst) {
2457                struct in6_addr saddr_buf;
2458                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2459                    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2460                        goto nla_put_failure;
2461        }
2462
2463        if (rt->rt6i_prefsrc.plen) {
2464                struct in6_addr saddr_buf;
2465                saddr_buf = rt->rt6i_prefsrc.addr;
2466                if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2467                        goto nla_put_failure;
2468        }
2469
2470        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2471                goto nla_put_failure;
2472
2473        rcu_read_lock();
2474        n = rt->n;
2475        if (n) {
2476                if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2477                        rcu_read_unlock();
2478                        goto nla_put_failure;
2479                }
2480        }
2481        rcu_read_unlock();
2482
2483        if (rt->dst.dev &&
2484            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2485                goto nla_put_failure;
2486        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2487                goto nla_put_failure;
2488
2489        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2490
2491        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2492                goto nla_put_failure;
2493
2494        return nlmsg_end(skb, nlh);
2495
2496nla_put_failure:
2497        nlmsg_cancel(skb, nlh);
2498        return -EMSGSIZE;
2499}
2500
2501int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2502{
2503        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2504        int prefix;
2505
2506        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2507                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2508                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2509        } else
2510                prefix = 0;
2511
2512        return rt6_fill_node(arg->net,
2513                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2514                     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2515                     prefix, 0, NLM_F_MULTI);
2516}
2517
2518static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2519{
2520        struct net *net = sock_net(in_skb->sk);
2521        struct nlattr *tb[RTA_MAX+1];
2522        struct rt6_info *rt;
2523        struct sk_buff *skb;
2524        struct rtmsg *rtm;
2525        struct flowi6 fl6;
2526        int err, iif = 0, oif = 0;
2527
2528        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2529        if (err < 0)
2530                goto errout;
2531
2532        err = -EINVAL;
2533        memset(&fl6, 0, sizeof(fl6));
2534
2535        if (tb[RTA_SRC]) {
2536                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2537                        goto errout;
2538
2539                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2540        }
2541
2542        if (tb[RTA_DST]) {
2543                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2544                        goto errout;
2545
2546                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2547        }
2548
2549        if (tb[RTA_IIF])
2550                iif = nla_get_u32(tb[RTA_IIF]);
2551
2552        if (tb[RTA_OIF])
2553                oif = nla_get_u32(tb[RTA_OIF]);
2554
2555        if (iif) {
2556                struct net_device *dev;
2557                int flags = 0;
2558
2559                dev = __dev_get_by_index(net, iif);
2560                if (!dev) {
2561                        err = -ENODEV;
2562                        goto errout;
2563                }
2564
2565                fl6.flowi6_iif = iif;
2566
2567                if (!ipv6_addr_any(&fl6.saddr))
2568                        flags |= RT6_LOOKUP_F_HAS_SADDR;
2569
2570                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2571                                                               flags);
2572        } else {
2573                fl6.flowi6_oif = oif;
2574
2575                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2576        }
2577
2578        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2579        if (!skb) {
2580                dst_release(&rt->dst);
2581                err = -ENOBUFS;
2582                goto errout;
2583        }
2584
2585        /* Reserve room for dummy headers, this skb can pass
2586           through good chunk of routing engine.
2587         */
2588        skb_reset_mac_header(skb);
2589        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2590
2591        skb_dst_set(skb, &rt->dst);
2592
2593        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2594                            RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2595                            nlh->nlmsg_seq, 0, 0, 0);
2596        if (err < 0) {
2597                kfree_skb(skb);
2598                goto errout;
2599        }
2600
2601        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2602errout:
2603        return err;
2604}
2605
2606void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2607{
2608        struct sk_buff *skb;
2609        struct net *net = info->nl_net;
2610        u32 seq;
2611        int err;
2612
2613        err = -ENOBUFS;
2614        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2615
2616        skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2617        if (!skb)
2618                goto errout;
2619
2620        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2621                                event, info->pid, seq, 0, 0, 0);
2622        if (err < 0) {
2623                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2624                WARN_ON(err == -EMSGSIZE);
2625                kfree_skb(skb);
2626                goto errout;
2627        }
2628        rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2629                    info->nlh, gfp_any());
2630        return;
2631errout:
2632        if (err < 0)
2633                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2634}
2635
2636static int ip6_route_dev_notify(struct notifier_block *this,
2637                                unsigned long event, void *data)
2638{
2639        struct net_device *dev = (struct net_device *)data;
2640        struct net *net = dev_net(dev);
2641
2642        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2643                net->ipv6.ip6_null_entry->dst.dev = dev;
2644                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2645#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2646                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2647                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2648                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2649                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2650#endif
2651        }
2652
2653        return NOTIFY_OK;
2654}
2655
2656/*
2657 *      /proc
2658 */
2659
2660#ifdef CONFIG_PROC_FS
2661
2662struct rt6_proc_arg
2663{
2664        char *buffer;
2665        int offset;
2666        int length;
2667        int skip;
2668        int len;
2669};
2670
2671static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2672{
2673        struct seq_file *m = p_arg;
2674        struct neighbour *n;
2675
2676        seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2677
2678#ifdef CONFIG_IPV6_SUBTREES
2679        seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2680#else
2681        seq_puts(m, "00000000000000000000000000000000 00 ");
2682#endif
2683        rcu_read_lock();
2684        n = rt->n;
2685        if (n) {
2686                seq_printf(m, "%pi6", n->primary_key);
2687        } else {
2688                seq_puts(m, "00000000000000000000000000000000");
2689        }
2690        rcu_read_unlock();
2691        seq_printf(m, " %08x %08x %08x %08x %8s\n",
2692                   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2693                   rt->dst.__use, rt->rt6i_flags,
2694                   rt->dst.dev ? rt->dst.dev->name : "");
2695        return 0;
2696}
2697
2698static int ipv6_route_show(struct seq_file *m, void *v)
2699{
2700        struct net *net = (struct net *)m->private;
2701        fib6_clean_all_ro(net, rt6_info_route, 0, m);
2702        return 0;
2703}
2704
2705static int ipv6_route_open(struct inode *inode, struct file *file)
2706{
2707        return single_open_net(inode, file, ipv6_route_show);
2708}
2709
2710static const struct file_operations ipv6_route_proc_fops = {
2711        .owner          = THIS_MODULE,
2712        .open           = ipv6_route_open,
2713        .read           = seq_read,
2714        .llseek         = seq_lseek,
2715        .release        = single_release_net,
2716};
2717
2718static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2719{
2720        struct net *net = (struct net *)seq->private;
2721        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2722                   net->ipv6.rt6_stats->fib_nodes,
2723                   net->ipv6.rt6_stats->fib_route_nodes,
2724                   net->ipv6.rt6_stats->fib_rt_alloc,
2725                   net->ipv6.rt6_stats->fib_rt_entries,
2726                   net->ipv6.rt6_stats->fib_rt_cache,
2727                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2728                   net->ipv6.rt6_stats->fib_discarded_routes);
2729
2730        return 0;
2731}
2732
2733static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2734{
2735        return single_open_net(inode, file, rt6_stats_seq_show);
2736}
2737
2738static const struct file_operations rt6_stats_seq_fops = {
2739        .owner   = THIS_MODULE,
2740        .open    = rt6_stats_seq_open,
2741        .read    = seq_read,
2742        .llseek  = seq_lseek,
2743        .release = single_release_net,
2744};
2745#endif  /* CONFIG_PROC_FS */
2746
2747#ifdef CONFIG_SYSCTL
2748
2749static
2750int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2751                              void __user *buffer, size_t *lenp, loff_t *ppos)
2752{
2753        struct net *net;
2754        int delay;
2755        if (!write)
2756                return -EINVAL;
2757
2758        net = (struct net *)ctl->extra1;
2759        delay = net->ipv6.sysctl.flush_delay;
2760        proc_dointvec(ctl, write, buffer, lenp, ppos);
2761        fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2762        return 0;
2763}
2764
2765ctl_table ipv6_route_table_template[] = {
2766        {
2767                .procname       =       "flush",
2768                .data           =       &init_net.ipv6.sysctl.flush_delay,
2769                .maxlen         =       sizeof(int),
2770                .mode           =       0200,
2771                .proc_handler   =       ipv6_sysctl_rtcache_flush
2772        },
2773        {
2774                .procname       =       "gc_thresh",
2775                .data           =       &ip6_dst_ops_template.gc_thresh,
2776                .maxlen         =       sizeof(int),
2777                .mode           =       0644,
2778                .proc_handler   =       proc_dointvec,
2779        },
2780        {
2781                .procname       =       "max_size",
2782                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2783                .maxlen         =       sizeof(int),
2784                .mode           =       0644,
2785                .proc_handler   =       proc_dointvec,
2786        },
2787        {
2788                .procname       =       "gc_min_interval",
2789                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2790                .maxlen         =       sizeof(int),
2791                .mode           =       0644,
2792                .proc_handler   =       proc_dointvec_jiffies,
2793        },
2794        {
2795                .procname       =       "gc_timeout",
2796                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2797                .maxlen         =       sizeof(int),
2798                .mode           =       0644,
2799                .proc_handler   =       proc_dointvec_jiffies,
2800        },
2801        {
2802                .procname       =       "gc_interval",
2803                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2804                .maxlen         =       sizeof(int),
2805                .mode           =       0644,
2806                .proc_handler   =       proc_dointvec_jiffies,
2807        },
2808        {
2809                .procname       =       "gc_elasticity",
2810                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2811                .maxlen         =       sizeof(int),
2812                .mode           =       0644,
2813                .proc_handler   =       proc_dointvec,
2814        },
2815        {
2816                .procname       =       "mtu_expires",
2817                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2818                .maxlen         =       sizeof(int),
2819                .mode           =       0644,
2820                .proc_handler   =       proc_dointvec_jiffies,
2821        },
2822        {
2823                .procname       =       "min_adv_mss",
2824                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2825                .maxlen         =       sizeof(int),
2826                .mode           =       0644,
2827                .proc_handler   =       proc_dointvec,
2828        },
2829        {
2830                .procname       =       "gc_min_interval_ms",
2831                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2832                .maxlen         =       sizeof(int),
2833                .mode           =       0644,
2834                .proc_handler   =       proc_dointvec_ms_jiffies,
2835        },
2836        { }
2837};
2838
2839struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2840{
2841        struct ctl_table *table;
2842
2843        table = kmemdup(ipv6_route_table_template,
2844                        sizeof(ipv6_route_table_template),
2845                        GFP_KERNEL);
2846
2847        if (table) {
2848                table[0].data = &net->ipv6.sysctl.flush_delay;
2849                table[0].extra1 = net;
2850                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2851                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2852                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2853                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2854                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2855                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2856                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2857                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2858                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2859        }
2860
2861        return table;
2862}
2863#endif
2864
2865static int __net_init ip6_route_net_init(struct net *net)
2866{
2867        int ret = -ENOMEM;
2868
2869        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2870               sizeof(net->ipv6.ip6_dst_ops));
2871
2872        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2873                goto out_ip6_dst_ops;
2874
2875        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2876                                           sizeof(*net->ipv6.ip6_null_entry),
2877                                           GFP_KERNEL);
2878        if (!net->ipv6.ip6_null_entry)
2879                goto out_ip6_dst_entries;
2880        net->ipv6.ip6_null_entry->dst.path =
2881                (struct dst_entry *)net->ipv6.ip6_null_entry;
2882        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2883        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2884                         ip6_template_metrics, true);
2885
2886#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2887        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2888                                               sizeof(*net->ipv6.ip6_prohibit_entry),
2889                                               GFP_KERNEL);
2890        if (!net->ipv6.ip6_prohibit_entry)
2891                goto out_ip6_null_entry;
2892        net->ipv6.ip6_prohibit_entry->dst.path =
2893                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2894        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2895        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2896                         ip6_template_metrics, true);
2897
2898        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2899                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
2900                                               GFP_KERNEL);
2901        if (!net->ipv6.ip6_blk_hole_entry)
2902                goto out_ip6_prohibit_entry;
2903        net->ipv6.ip6_blk_hole_entry->dst.path =
2904                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2905        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2906        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2907                         ip6_template_metrics, true);
2908#endif
2909
2910        net->ipv6.sysctl.flush_delay = 0;
2911        net->ipv6.sysctl.ip6_rt_max_size = 4096;
2912        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2913        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2914        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2915        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2916        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2917        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2918
2919        net->ipv6.ip6_rt_gc_expire = 30*HZ;
2920
2921        ret = 0;
2922out:
2923        return ret;
2924
2925#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2926out_ip6_prohibit_entry:
2927        kfree(net->ipv6.ip6_prohibit_entry);
2928out_ip6_null_entry:
2929        kfree(net->ipv6.ip6_null_entry);
2930#endif
2931out_ip6_dst_entries:
2932        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2933out_ip6_dst_ops:
2934        goto out;
2935}
2936
2937static void __net_exit ip6_route_net_exit(struct net *net)
2938{
2939        kfree(net->ipv6.ip6_null_entry);
2940#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2941        kfree(net->ipv6.ip6_prohibit_entry);
2942        kfree(net->ipv6.ip6_blk_hole_entry);
2943#endif
2944        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2945}
2946
2947static int __net_init ip6_route_net_init_late(struct net *net)
2948{
2949#ifdef CONFIG_PROC_FS
2950        proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2951        proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2952#endif
2953        return 0;
2954}
2955
2956static void __net_exit ip6_route_net_exit_late(struct net *net)
2957{
2958#ifdef CONFIG_PROC_FS
2959        proc_net_remove(net, "ipv6_route");
2960        proc_net_remove(net, "rt6_stats");
2961#endif
2962}
2963
2964static struct pernet_operations ip6_route_net_ops = {
2965        .init = ip6_route_net_init,
2966        .exit = ip6_route_net_exit,
2967};
2968
2969static int __net_init ipv6_inetpeer_init(struct net *net)
2970{
2971        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2972
2973        if (!bp)
2974                return -ENOMEM;
2975        inet_peer_base_init(bp);
2976        net->ipv6.peers = bp;
2977        return 0;
2978}
2979
2980static void __net_exit ipv6_inetpeer_exit(struct net *net)
2981{
2982        struct inet_peer_base *bp = net->ipv6.peers;
2983
2984        net->ipv6.peers = NULL;
2985        inetpeer_invalidate_tree(bp);
2986        kfree(bp);
2987}
2988
2989static struct pernet_operations ipv6_inetpeer_ops = {
2990        .init   =       ipv6_inetpeer_init,
2991        .exit   =       ipv6_inetpeer_exit,
2992};
2993
2994static struct pernet_operations ip6_route_net_late_ops = {
2995        .init = ip6_route_net_init_late,
2996        .exit = ip6_route_net_exit_late,
2997};
2998
2999static struct notifier_block ip6_route_dev_notifier = {
3000        .notifier_call = ip6_route_dev_notify,
3001        .priority = 0,
3002};
3003
3004int __init ip6_route_init(void)
3005{
3006        int ret;
3007
3008        ret = -ENOMEM;
3009        ip6_dst_ops_template.kmem_cachep =
3010                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3011                                  SLAB_HWCACHE_ALIGN, NULL);
3012        if (!ip6_dst_ops_template.kmem_cachep)
3013                goto out;
3014
3015        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3016        if (ret)
3017                goto out_kmem_cache;
3018
3019        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3020        if (ret)
3021                goto out_dst_entries;
3022
3023        ret = register_pernet_subsys(&ip6_route_net_ops);
3024        if (ret)
3025                goto out_register_inetpeer;
3026
3027        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3028
3029        /* Registering of the loopback is done before this portion of code,
3030         * the loopback reference in rt6_info will not be taken, do it
3031         * manually for init_net */
3032        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3033        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3034  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3035        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3036        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3037        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3038        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3039  #endif
3040        ret = fib6_init();
3041        if (ret)
3042                goto out_register_subsys;
3043
3044        ret = xfrm6_init();
3045        if (ret)
3046                goto out_fib6_init;
3047
3048        ret = fib6_rules_init();
3049        if (ret)
3050                goto xfrm6_init;
3051
3052        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3053        if (ret)
3054                goto fib6_rules_init;
3055
3056        ret = -ENOBUFS;
3057        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3058            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3059            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3060                goto out_register_late_subsys;
3061
3062        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3063        if (ret)
3064                goto out_register_late_subsys;
3065
3066out:
3067        return ret;
3068
3069out_register_late_subsys:
3070        unregister_pernet_subsys(&ip6_route_net_late_ops);
3071fib6_rules_init:
3072        fib6_rules_cleanup();
3073xfrm6_init:
3074        xfrm6_fini();
3075out_fib6_init:
3076        fib6_gc_cleanup();
3077out_register_subsys:
3078        unregister_pernet_subsys(&ip6_route_net_ops);
3079out_register_inetpeer:
3080        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3081out_dst_entries:
3082        dst_entries_destroy(&ip6_dst_blackhole_ops);
3083out_kmem_cache:
3084        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3085        goto out;
3086}
3087
3088void ip6_route_cleanup(void)
3089{
3090        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3091        unregister_pernet_subsys(&ip6_route_net_late_ops);
3092        fib6_rules_cleanup();
3093        xfrm6_fini();
3094        fib6_gc_cleanup();
3095        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3096        unregister_pernet_subsys(&ip6_route_net_ops);
3097        dst_entries_destroy(&ip6_dst_blackhole_ops);
3098        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3099}
3100
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.