linux/net/ipv6/route.c
<<
>>
Prefs
   1/*
   2 *      Linux INET6 implementation
   3 *      FIB front-end.
   4 *
   5 *      Authors:
   6 *      Pedro Roque             <roque@di.fc.ul.pt>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 */
  13
  14/*      Changes:
  15 *
  16 *      YOSHIFUJI Hideaki @USAGI
  17 *              reworked default router selection.
  18 *              - respect outgoing interface
  19 *              - select from (probably) reachable routers (i.e.
  20 *              routers in REACHABLE, STALE, DELAY or PROBE states).
  21 *              - always select the same router if it is (probably)
  22 *              reachable.  otherwise, round-robin the list.
  23 *      Ville Nuorvala
  24 *              Fixed routing subtrees.
  25 */
  26
  27#define pr_fmt(fmt) "IPv6: " fmt
  28
  29#include <linux/capability.h>
  30#include <linux/errno.h>
  31#include <linux/export.h>
  32#include <linux/types.h>
  33#include <linux/times.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/net.h>
  37#include <linux/route.h>
  38#include <linux/netdevice.h>
  39#include <linux/in6.h>
  40#include <linux/mroute6.h>
  41#include <linux/init.h>
  42#include <linux/if_arp.h>
  43#include <linux/proc_fs.h>
  44#include <linux/seq_file.h>
  45#include <linux/nsproxy.h>
  46#include <linux/slab.h>
  47#include <net/net_namespace.h>
  48#include <net/snmp.h>
  49#include <net/ipv6.h>
  50#include <net/ip6_fib.h>
  51#include <net/ip6_route.h>
  52#include <net/ndisc.h>
  53#include <net/addrconf.h>
  54#include <net/tcp.h>
  55#include <linux/rtnetlink.h>
  56#include <net/dst.h>
  57#include <net/xfrm.h>
  58#include <net/netevent.h>
  59#include <net/netlink.h>
  60
  61#include <asm/uaccess.h>
  62
  63#ifdef CONFIG_SYSCTL
  64#include <linux/sysctl.h>
  65#endif
  66
  67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
  68                                    const struct in6_addr *dest);
  69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
  70static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
  71static unsigned int      ip6_mtu(const struct dst_entry *dst);
  72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
  73static void             ip6_dst_destroy(struct dst_entry *);
  74static void             ip6_dst_ifdown(struct dst_entry *,
  75                                       struct net_device *dev, int how);
  76static int               ip6_dst_gc(struct dst_ops *ops);
  77
  78static int              ip6_pkt_discard(struct sk_buff *skb);
  79static int              ip6_pkt_discard_out(struct sk_buff *skb);
  80static void             ip6_link_failure(struct sk_buff *skb);
  81static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
  82                                           struct sk_buff *skb, u32 mtu);
  83static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  84                                        struct sk_buff *skb);
  85
  86#ifdef CONFIG_IPV6_ROUTE_INFO
  87static struct rt6_info *rt6_add_route_info(struct net *net,
  88                                           const struct in6_addr *prefix, int prefixlen,
  89                                           const struct in6_addr *gwaddr, int ifindex,
  90                                           unsigned int pref);
  91static struct rt6_info *rt6_get_route_info(struct net *net,
  92                                           const struct in6_addr *prefix, int prefixlen,
  93                                           const struct in6_addr *gwaddr, int ifindex);
  94#endif
  95
  96static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
  97{
  98        struct rt6_info *rt = (struct rt6_info *) dst;
  99        struct inet_peer *peer;
 100        u32 *p = NULL;
 101
 102        if (!(rt->dst.flags & DST_HOST))
 103                return NULL;
 104
 105        peer = rt6_get_peer_create(rt);
 106        if (peer) {
 107                u32 *old_p = __DST_METRICS_PTR(old);
 108                unsigned long prev, new;
 109
 110                p = peer->metrics;
 111                if (inet_metrics_new(peer))
 112                        memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
 113
 114                new = (unsigned long) p;
 115                prev = cmpxchg(&dst->_metrics, old, new);
 116
 117                if (prev != old) {
 118                        p = __DST_METRICS_PTR(prev);
 119                        if (prev & DST_METRICS_READ_ONLY)
 120                                p = NULL;
 121                }
 122        }
 123        return p;
 124}
 125
 126static inline const void *choose_neigh_daddr(struct rt6_info *rt,
 127                                             struct sk_buff *skb,
 128                                             const void *daddr)
 129{
 130        struct in6_addr *p = &rt->rt6i_gateway;
 131
 132        if (!ipv6_addr_any(p))
 133                return (const void *) p;
 134        else if (skb)
 135                return &ipv6_hdr(skb)->daddr;
 136        return daddr;
 137}
 138
 139static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 140                                          struct sk_buff *skb,
 141                                          const void *daddr)
 142{
 143        struct rt6_info *rt = (struct rt6_info *) dst;
 144        struct neighbour *n;
 145
 146        daddr = choose_neigh_daddr(rt, skb, daddr);
 147        n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
 148        if (n)
 149                return n;
 150        return neigh_create(&nd_tbl, daddr, dst->dev);
 151}
 152
 153static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
 154{
 155        struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
 156        if (!n) {
 157                n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
 158                if (IS_ERR(n))
 159                        return PTR_ERR(n);
 160        }
 161        rt->n = n;
 162
 163        return 0;
 164}
 165
 166static struct dst_ops ip6_dst_ops_template = {
 167        .family                 =       AF_INET6,
 168        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 169        .gc                     =       ip6_dst_gc,
 170        .gc_thresh              =       1024,
 171        .check                  =       ip6_dst_check,
 172        .default_advmss         =       ip6_default_advmss,
 173        .mtu                    =       ip6_mtu,
 174        .cow_metrics            =       ipv6_cow_metrics,
 175        .destroy                =       ip6_dst_destroy,
 176        .ifdown                 =       ip6_dst_ifdown,
 177        .negative_advice        =       ip6_negative_advice,
 178        .link_failure           =       ip6_link_failure,
 179        .update_pmtu            =       ip6_rt_update_pmtu,
 180        .redirect               =       rt6_do_redirect,
 181        .local_out              =       __ip6_local_out,
 182        .neigh_lookup           =       ip6_neigh_lookup,
 183};
 184
 185static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 186{
 187        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 188
 189        return mtu ? : dst->dev->mtu;
 190}
 191
 192static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 193                                         struct sk_buff *skb, u32 mtu)
 194{
 195}
 196
 197static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 198                                      struct sk_buff *skb)
 199{
 200}
 201
 202static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
 203                                         unsigned long old)
 204{
 205        return NULL;
 206}
 207
 208static struct dst_ops ip6_dst_blackhole_ops = {
 209        .family                 =       AF_INET6,
 210        .protocol               =       cpu_to_be16(ETH_P_IPV6),
 211        .destroy                =       ip6_dst_destroy,
 212        .check                  =       ip6_dst_check,
 213        .mtu                    =       ip6_blackhole_mtu,
 214        .default_advmss         =       ip6_default_advmss,
 215        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 216        .redirect               =       ip6_rt_blackhole_redirect,
 217        .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
 218        .neigh_lookup           =       ip6_neigh_lookup,
 219};
 220
 221static const u32 ip6_template_metrics[RTAX_MAX] = {
 222        [RTAX_HOPLIMIT - 1] = 0,
 223};
 224
 225static struct rt6_info ip6_null_entry_template = {
 226        .dst = {
 227                .__refcnt       = ATOMIC_INIT(1),
 228                .__use          = 1,
 229                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 230                .error          = -ENETUNREACH,
 231                .input          = ip6_pkt_discard,
 232                .output         = ip6_pkt_discard_out,
 233        },
 234        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 235        .rt6i_protocol  = RTPROT_KERNEL,
 236        .rt6i_metric    = ~(u32) 0,
 237        .rt6i_ref       = ATOMIC_INIT(1),
 238};
 239
 240#ifdef CONFIG_IPV6_MULTIPLE_TABLES
 241
 242static int ip6_pkt_prohibit(struct sk_buff *skb);
 243static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 244
 245static struct rt6_info ip6_prohibit_entry_template = {
 246        .dst = {
 247                .__refcnt       = ATOMIC_INIT(1),
 248                .__use          = 1,
 249                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 250                .error          = -EACCES,
 251                .input          = ip6_pkt_prohibit,
 252                .output         = ip6_pkt_prohibit_out,
 253        },
 254        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 255        .rt6i_protocol  = RTPROT_KERNEL,
 256        .rt6i_metric    = ~(u32) 0,
 257        .rt6i_ref       = ATOMIC_INIT(1),
 258};
 259
 260static struct rt6_info ip6_blk_hole_entry_template = {
 261        .dst = {
 262                .__refcnt       = ATOMIC_INIT(1),
 263                .__use          = 1,
 264                .obsolete       = DST_OBSOLETE_FORCE_CHK,
 265                .error          = -EINVAL,
 266                .input          = dst_discard,
 267                .output         = dst_discard,
 268        },
 269        .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 270        .rt6i_protocol  = RTPROT_KERNEL,
 271        .rt6i_metric    = ~(u32) 0,
 272        .rt6i_ref       = ATOMIC_INIT(1),
 273};
 274
 275#endif
 276
 277/* allocate dst with ip6_dst_ops */
 278static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 279                                             struct net_device *dev,
 280                                             int flags,
 281                                             struct fib6_table *table)
 282{
 283        struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 284                                        0, DST_OBSOLETE_FORCE_CHK, flags);
 285
 286        if (rt) {
 287                struct dst_entry *dst = &rt->dst;
 288
 289                memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 290                rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
 291                rt->rt6i_genid = rt_genid(net);
 292        }
 293        return rt;
 294}
 295
 296static void ip6_dst_destroy(struct dst_entry *dst)
 297{
 298        struct rt6_info *rt = (struct rt6_info *)dst;
 299        struct inet6_dev *idev = rt->rt6i_idev;
 300
 301        if (rt->n)
 302                neigh_release(rt->n);
 303
 304        if (!(rt->dst.flags & DST_HOST))
 305                dst_destroy_metrics_generic(dst);
 306
 307        if (idev) {
 308                rt->rt6i_idev = NULL;
 309                in6_dev_put(idev);
 310        }
 311
 312        if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
 313                dst_release(dst->from);
 314
 315        if (rt6_has_peer(rt)) {
 316                struct inet_peer *peer = rt6_peer_ptr(rt);
 317                inet_putpeer(peer);
 318        }
 319}
 320
 321static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
 322
 323static u32 rt6_peer_genid(void)
 324{
 325        return atomic_read(&__rt6_peer_genid);
 326}
 327
 328void rt6_bind_peer(struct rt6_info *rt, int create)
 329{
 330        struct inet_peer_base *base;
 331        struct inet_peer *peer;
 332
 333        base = inetpeer_base_ptr(rt->_rt6i_peer);
 334        if (!base)
 335                return;
 336
 337        peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
 338        if (peer) {
 339                if (!rt6_set_peer(rt, peer))
 340                        inet_putpeer(peer);
 341                else
 342                        rt->rt6i_peer_genid = rt6_peer_genid();
 343        }
 344}
 345
 346static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 347                           int how)
 348{
 349        struct rt6_info *rt = (struct rt6_info *)dst;
 350        struct inet6_dev *idev = rt->rt6i_idev;
 351        struct net_device *loopback_dev =
 352                dev_net(dev)->loopback_dev;
 353
 354        if (dev != loopback_dev) {
 355                if (idev && idev->dev == dev) {
 356                        struct inet6_dev *loopback_idev =
 357                                in6_dev_get(loopback_dev);
 358                        if (loopback_idev) {
 359                                rt->rt6i_idev = loopback_idev;
 360                                in6_dev_put(idev);
 361                        }
 362                }
 363                if (rt->n && rt->n->dev == dev) {
 364                        rt->n->dev = loopback_dev;
 365                        dev_hold(loopback_dev);
 366                        dev_put(dev);
 367                }
 368        }
 369}
 370
 371static bool rt6_check_expired(const struct rt6_info *rt)
 372{
 373        struct rt6_info *ort = NULL;
 374
 375        if (rt->rt6i_flags & RTF_EXPIRES) {
 376                if (time_after(jiffies, rt->dst.expires))
 377                        return true;
 378        } else if (rt->dst.from) {
 379                ort = (struct rt6_info *) rt->dst.from;
 380                return (ort->rt6i_flags & RTF_EXPIRES) &&
 381                        time_after(jiffies, ort->dst.expires);
 382        }
 383        return false;
 384}
 385
 386static bool rt6_need_strict(const struct in6_addr *daddr)
 387{
 388        return ipv6_addr_type(daddr) &
 389                (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 390}
 391
 392/*
 393 *      Route lookup. Any table->tb6_lock is implied.
 394 */
 395
 396static inline struct rt6_info *rt6_device_match(struct net *net,
 397                                                    struct rt6_info *rt,
 398                                                    const struct in6_addr *saddr,
 399                                                    int oif,
 400                                                    int flags)
 401{
 402        struct rt6_info *local = NULL;
 403        struct rt6_info *sprt;
 404
 405        if (!oif && ipv6_addr_any(saddr))
 406                goto out;
 407
 408        for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 409                struct net_device *dev = sprt->dst.dev;
 410
 411                if (oif) {
 412                        if (dev->ifindex == oif)
 413                                return sprt;
 414                        if (dev->flags & IFF_LOOPBACK) {
 415                                if (!sprt->rt6i_idev ||
 416                                    sprt->rt6i_idev->dev->ifindex != oif) {
 417                                        if (flags & RT6_LOOKUP_F_IFACE && oif)
 418                                                continue;
 419                                        if (local && (!oif ||
 420                                                      local->rt6i_idev->dev->ifindex == oif))
 421                                                continue;
 422                                }
 423                                local = sprt;
 424                        }
 425                } else {
 426                        if (ipv6_chk_addr(net, saddr, dev,
 427                                          flags & RT6_LOOKUP_F_IFACE))
 428                                return sprt;
 429                }
 430        }
 431
 432        if (oif) {
 433                if (local)
 434                        return local;
 435
 436                if (flags & RT6_LOOKUP_F_IFACE)
 437                        return net->ipv6.ip6_null_entry;
 438        }
 439out:
 440        return rt;
 441}
 442
 443#ifdef CONFIG_IPV6_ROUTER_PREF
 444static void rt6_probe(struct rt6_info *rt)
 445{
 446        struct neighbour *neigh;
 447        /*
 448         * Okay, this does not seem to be appropriate
 449         * for now, however, we need to check if it
 450         * is really so; aka Router Reachability Probing.
 451         *
 452         * Router Reachability Probe MUST be rate-limited
 453         * to no more than one per minute.
 454         */
 455        rcu_read_lock();
 456        neigh = rt ? rt->n : NULL;
 457        if (!neigh || (neigh->nud_state & NUD_VALID))
 458                goto out;
 459        read_lock_bh(&neigh->lock);
 460        if (!(neigh->nud_state & NUD_VALID) &&
 461            time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 462                struct in6_addr mcaddr;
 463                struct in6_addr *target;
 464
 465                neigh->updated = jiffies;
 466                read_unlock_bh(&neigh->lock);
 467
 468                target = (struct in6_addr *)&neigh->primary_key;
 469                addrconf_addr_solict_mult(target, &mcaddr);
 470                ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
 471        } else {
 472                read_unlock_bh(&neigh->lock);
 473        }
 474out:
 475        rcu_read_unlock();
 476}
 477#else
 478static inline void rt6_probe(struct rt6_info *rt)
 479{
 480}
 481#endif
 482
 483/*
 484 * Default Router Selection (RFC 2461 6.3.6)
 485 */
 486static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 487{
 488        struct net_device *dev = rt->dst.dev;
 489        if (!oif || dev->ifindex == oif)
 490                return 2;
 491        if ((dev->flags & IFF_LOOPBACK) &&
 492            rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 493                return 1;
 494        return 0;
 495}
 496
 497static inline int rt6_check_neigh(struct rt6_info *rt)
 498{
 499        struct neighbour *neigh;
 500        int m;
 501
 502        rcu_read_lock();
 503        neigh = rt->n;
 504        if (rt->rt6i_flags & RTF_NONEXTHOP ||
 505            !(rt->rt6i_flags & RTF_GATEWAY))
 506                m = 1;
 507        else if (neigh) {
 508                read_lock_bh(&neigh->lock);
 509                if (neigh->nud_state & NUD_VALID)
 510                        m = 2;
 511#ifdef CONFIG_IPV6_ROUTER_PREF
 512                else if (neigh->nud_state & NUD_FAILED)
 513                        m = 0;
 514#endif
 515                else
 516                        m = 1;
 517                read_unlock_bh(&neigh->lock);
 518        } else
 519                m = 0;
 520        rcu_read_unlock();
 521        return m;
 522}
 523
 524static int rt6_score_route(struct rt6_info *rt, int oif,
 525                           int strict)
 526{
 527        int m, n;
 528
 529        m = rt6_check_dev(rt, oif);
 530        if (!m && (strict & RT6_LOOKUP_F_IFACE))
 531                return -1;
 532#ifdef CONFIG_IPV6_ROUTER_PREF
 533        m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 534#endif
 535        n = rt6_check_neigh(rt);
 536        if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 537                return -1;
 538        return m;
 539}
 540
 541static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 542                                   int *mpri, struct rt6_info *match)
 543{
 544        int m;
 545
 546        if (rt6_check_expired(rt))
 547                goto out;
 548
 549        m = rt6_score_route(rt, oif, strict);
 550        if (m < 0)
 551                goto out;
 552
 553        if (m > *mpri) {
 554                if (strict & RT6_LOOKUP_F_REACHABLE)
 555                        rt6_probe(match);
 556                *mpri = m;
 557                match = rt;
 558        } else if (strict & RT6_LOOKUP_F_REACHABLE) {
 559                rt6_probe(rt);
 560        }
 561
 562out:
 563        return match;
 564}
 565
 566static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 567                                     struct rt6_info *rr_head,
 568                                     u32 metric, int oif, int strict)
 569{
 570        struct rt6_info *rt, *match;
 571        int mpri = -1;
 572
 573        match = NULL;
 574        for (rt = rr_head; rt && rt->rt6i_metric == metric;
 575             rt = rt->dst.rt6_next)
 576                match = find_match(rt, oif, strict, &mpri, match);
 577        for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 578             rt = rt->dst.rt6_next)
 579                match = find_match(rt, oif, strict, &mpri, match);
 580
 581        return match;
 582}
 583
 584static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 585{
 586        struct rt6_info *match, *rt0;
 587        struct net *net;
 588
 589        rt0 = fn->rr_ptr;
 590        if (!rt0)
 591                fn->rr_ptr = rt0 = fn->leaf;
 592
 593        match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 594
 595        if (!match &&
 596            (strict & RT6_LOOKUP_F_REACHABLE)) {
 597                struct rt6_info *next = rt0->dst.rt6_next;
 598
 599                /* no entries matched; do round-robin */
 600                if (!next || next->rt6i_metric != rt0->rt6i_metric)
 601                        next = fn->leaf;
 602
 603                if (next != rt0)
 604                        fn->rr_ptr = next;
 605        }
 606
 607        net = dev_net(rt0->dst.dev);
 608        return match ? match : net->ipv6.ip6_null_entry;
 609}
 610
 611#ifdef CONFIG_IPV6_ROUTE_INFO
 612int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 613                  const struct in6_addr *gwaddr)
 614{
 615        struct net *net = dev_net(dev);
 616        struct route_info *rinfo = (struct route_info *) opt;
 617        struct in6_addr prefix_buf, *prefix;
 618        unsigned int pref;
 619        unsigned long lifetime;
 620        struct rt6_info *rt;
 621
 622        if (len < sizeof(struct route_info)) {
 623                return -EINVAL;
 624        }
 625
 626        /* Sanity check for prefix_len and length */
 627        if (rinfo->length > 3) {
 628                return -EINVAL;
 629        } else if (rinfo->prefix_len > 128) {
 630                return -EINVAL;
 631        } else if (rinfo->prefix_len > 64) {
 632                if (rinfo->length < 2) {
 633                        return -EINVAL;
 634                }
 635        } else if (rinfo->prefix_len > 0) {
 636                if (rinfo->length < 1) {
 637                        return -EINVAL;
 638                }
 639        }
 640
 641        pref = rinfo->route_pref;
 642        if (pref == ICMPV6_ROUTER_PREF_INVALID)
 643                return -EINVAL;
 644
 645        lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 646
 647        if (rinfo->length == 3)
 648                prefix = (struct in6_addr *)rinfo->prefix;
 649        else {
 650                /* this function is safe */
 651                ipv6_addr_prefix(&prefix_buf,
 652                                 (struct in6_addr *)rinfo->prefix,
 653                                 rinfo->prefix_len);
 654                prefix = &prefix_buf;
 655        }
 656
 657        rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
 658                                dev->ifindex);
 659
 660        if (rt && !lifetime) {
 661                ip6_del_rt(rt);
 662                rt = NULL;
 663        }
 664
 665        if (!rt && lifetime)
 666                rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 667                                        pref);
 668        else if (rt)
 669                rt->rt6i_flags = RTF_ROUTEINFO |
 670                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 671
 672        if (rt) {
 673                if (!addrconf_finite_timeout(lifetime))
 674                        rt6_clean_expires(rt);
 675                else
 676                        rt6_set_expires(rt, jiffies + HZ * lifetime);
 677
 678                dst_release(&rt->dst);
 679        }
 680        return 0;
 681}
 682#endif
 683
 684#define BACKTRACK(__net, saddr)                 \
 685do { \
 686        if (rt == __net->ipv6.ip6_null_entry) { \
 687                struct fib6_node *pn; \
 688                while (1) { \
 689                        if (fn->fn_flags & RTN_TL_ROOT) \
 690                                goto out; \
 691                        pn = fn->parent; \
 692                        if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 693                                fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 694                        else \
 695                                fn = pn; \
 696                        if (fn->fn_flags & RTN_RTINFO) \
 697                                goto restart; \
 698                } \
 699        } \
 700} while (0)
 701
 702static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 703                                             struct fib6_table *table,
 704                                             struct flowi6 *fl6, int flags)
 705{
 706        struct fib6_node *fn;
 707        struct rt6_info *rt;
 708
 709        read_lock_bh(&table->tb6_lock);
 710        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 711restart:
 712        rt = fn->leaf;
 713        rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
 714        BACKTRACK(net, &fl6->saddr);
 715out:
 716        dst_use(&rt->dst, jiffies);
 717        read_unlock_bh(&table->tb6_lock);
 718        return rt;
 719
 720}
 721
 722struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
 723                                    int flags)
 724{
 725        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
 726}
 727EXPORT_SYMBOL_GPL(ip6_route_lookup);
 728
 729struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 730                            const struct in6_addr *saddr, int oif, int strict)
 731{
 732        struct flowi6 fl6 = {
 733                .flowi6_oif = oif,
 734                .daddr = *daddr,
 735        };
 736        struct dst_entry *dst;
 737        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 738
 739        if (saddr) {
 740                memcpy(&fl6.saddr, saddr, sizeof(*saddr));
 741                flags |= RT6_LOOKUP_F_HAS_SADDR;
 742        }
 743
 744        dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
 745        if (dst->error == 0)
 746                return (struct rt6_info *) dst;
 747
 748        dst_release(dst);
 749
 750        return NULL;
 751}
 752
 753EXPORT_SYMBOL(rt6_lookup);
 754
 755/* ip6_ins_rt is called with FREE table->tb6_lock.
 756   It takes new route entry, the addition fails by any reason the
 757   route is freed. In any case, if caller does not hold it, it may
 758   be destroyed.
 759 */
 760
 761static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 762{
 763        int err;
 764        struct fib6_table *table;
 765
 766        table = rt->rt6i_table;
 767        write_lock_bh(&table->tb6_lock);
 768        err = fib6_add(&table->tb6_root, rt, info);
 769        write_unlock_bh(&table->tb6_lock);
 770
 771        return err;
 772}
 773
 774int ip6_ins_rt(struct rt6_info *rt)
 775{
 776        struct nl_info info = {
 777                .nl_net = dev_net(rt->dst.dev),
 778        };
 779        return __ip6_ins_rt(rt, &info);
 780}
 781
 782static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
 783                                      const struct in6_addr *daddr,
 784                                      const struct in6_addr *saddr)
 785{
 786        struct rt6_info *rt;
 787
 788        /*
 789         *      Clone the route.
 790         */
 791
 792        rt = ip6_rt_copy(ort, daddr);
 793
 794        if (rt) {
 795                int attempts = !in_softirq();
 796
 797                if (!(rt->rt6i_flags & RTF_GATEWAY)) {
 798                        if (ort->rt6i_dst.plen != 128 &&
 799                            ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 800                                rt->rt6i_flags |= RTF_ANYCAST;
 801                        rt->rt6i_gateway = *daddr;
 802                }
 803
 804                rt->rt6i_flags |= RTF_CACHE;
 805
 806#ifdef CONFIG_IPV6_SUBTREES
 807                if (rt->rt6i_src.plen && saddr) {
 808                        rt->rt6i_src.addr = *saddr;
 809                        rt->rt6i_src.plen = 128;
 810                }
 811#endif
 812
 813        retry:
 814                if (rt6_bind_neighbour(rt, rt->dst.dev)) {
 815                        struct net *net = dev_net(rt->dst.dev);
 816                        int saved_rt_min_interval =
 817                                net->ipv6.sysctl.ip6_rt_gc_min_interval;
 818                        int saved_rt_elasticity =
 819                                net->ipv6.sysctl.ip6_rt_gc_elasticity;
 820
 821                        if (attempts-- > 0) {
 822                                net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 823                                net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 824
 825                                ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 826
 827                                net->ipv6.sysctl.ip6_rt_gc_elasticity =
 828                                        saved_rt_elasticity;
 829                                net->ipv6.sysctl.ip6_rt_gc_min_interval =
 830                                        saved_rt_min_interval;
 831                                goto retry;
 832                        }
 833
 834                        net_warn_ratelimited("Neighbour table overflow\n");
 835                        dst_free(&rt->dst);
 836                        return NULL;
 837                }
 838        }
 839
 840        return rt;
 841}
 842
 843static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
 844                                        const struct in6_addr *daddr)
 845{
 846        struct rt6_info *rt = ip6_rt_copy(ort, daddr);
 847
 848        if (rt) {
 849                rt->rt6i_flags |= RTF_CACHE;
 850                rt->n = neigh_clone(ort->n);
 851        }
 852        return rt;
 853}
 854
 855static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 856                                      struct flowi6 *fl6, int flags)
 857{
 858        struct fib6_node *fn;
 859        struct rt6_info *rt, *nrt;
 860        int strict = 0;
 861        int attempts = 3;
 862        int err;
 863        int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 864
 865        strict |= flags & RT6_LOOKUP_F_IFACE;
 866
 867relookup:
 868        read_lock_bh(&table->tb6_lock);
 869
 870restart_2:
 871        fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 872
 873restart:
 874        rt = rt6_select(fn, oif, strict | reachable);
 875
 876        BACKTRACK(net, &fl6->saddr);
 877        if (rt == net->ipv6.ip6_null_entry ||
 878            rt->rt6i_flags & RTF_CACHE)
 879                goto out;
 880
 881        dst_hold(&rt->dst);
 882        read_unlock_bh(&table->tb6_lock);
 883
 884        if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
 885                nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
 886        else if (!(rt->dst.flags & DST_HOST))
 887                nrt = rt6_alloc_clone(rt, &fl6->daddr);
 888        else
 889                goto out2;
 890
 891        dst_release(&rt->dst);
 892        rt = nrt ? : net->ipv6.ip6_null_entry;
 893
 894        dst_hold(&rt->dst);
 895        if (nrt) {
 896                err = ip6_ins_rt(nrt);
 897                if (!err)
 898                        goto out2;
 899        }
 900
 901        if (--attempts <= 0)
 902                goto out2;
 903
 904        /*
 905         * Race condition! In the gap, when table->tb6_lock was
 906         * released someone could insert this route.  Relookup.
 907         */
 908        dst_release(&rt->dst);
 909        goto relookup;
 910
 911out:
 912        if (reachable) {
 913                reachable = 0;
 914                goto restart_2;
 915        }
 916        dst_hold(&rt->dst);
 917        read_unlock_bh(&table->tb6_lock);
 918out2:
 919        rt->dst.lastuse = jiffies;
 920        rt->dst.__use++;
 921
 922        return rt;
 923}
 924
 925static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
 926                                            struct flowi6 *fl6, int flags)
 927{
 928        return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 929}
 930
 931static struct dst_entry *ip6_route_input_lookup(struct net *net,
 932                                                struct net_device *dev,
 933                                                struct flowi6 *fl6, int flags)
 934{
 935        if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 936                flags |= RT6_LOOKUP_F_IFACE;
 937
 938        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 939}
 940
 941void ip6_route_input(struct sk_buff *skb)
 942{
 943        const struct ipv6hdr *iph = ipv6_hdr(skb);
 944        struct net *net = dev_net(skb->dev);
 945        int flags = RT6_LOOKUP_F_HAS_SADDR;
 946        struct flowi6 fl6 = {
 947                .flowi6_iif = skb->dev->ifindex,
 948                .daddr = iph->daddr,
 949                .saddr = iph->saddr,
 950                .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
 951                .flowi6_mark = skb->mark,
 952                .flowi6_proto = iph->nexthdr,
 953        };
 954
 955        skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
 956}
 957
 958static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
 959                                             struct flowi6 *fl6, int flags)
 960{
 961        return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 962}
 963
 964struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 965                                    struct flowi6 *fl6)
 966{
 967        int flags = 0;
 968
 969        fl6->flowi6_iif = net->loopback_dev->ifindex;
 970
 971        if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 972                flags |= RT6_LOOKUP_F_IFACE;
 973
 974        if (!ipv6_addr_any(&fl6->saddr))
 975                flags |= RT6_LOOKUP_F_HAS_SADDR;
 976        else if (sk)
 977                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 978
 979        return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 980}
 981
 982EXPORT_SYMBOL(ip6_route_output);
 983
 984struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 985{
 986        struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
 987        struct dst_entry *new = NULL;
 988
 989        rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
 990        if (rt) {
 991                new = &rt->dst;
 992
 993                memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
 994                rt6_init_peer(rt, net->ipv6.peers);
 995
 996                new->__use = 1;
 997                new->input = dst_discard;
 998                new->output = dst_discard;
 999
1000                if (dst_metrics_read_only(&ort->dst))
1001                        new->_metrics = ort->dst._metrics;
1002                else
1003                        dst_copy_metrics(new, &ort->dst);
1004                rt->rt6i_idev = ort->rt6i_idev;
1005                if (rt->rt6i_idev)
1006                        in6_dev_hold(rt->rt6i_idev);
1007
1008                rt->rt6i_gateway = ort->rt6i_gateway;
1009                rt->rt6i_flags = ort->rt6i_flags;
1010                rt6_clean_expires(rt);
1011                rt->rt6i_metric = 0;
1012
1013                memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1014#ifdef CONFIG_IPV6_SUBTREES
1015                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1016#endif
1017
1018                dst_free(new);
1019        }
1020
1021        dst_release(dst_orig);
1022        return new ? new : ERR_PTR(-ENOMEM);
1023}
1024
1025/*
1026 *      Destination cache support functions
1027 */
1028
1029static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1030{
1031        struct rt6_info *rt;
1032
1033        rt = (struct rt6_info *) dst;
1034
1035        /* All IPV6 dsts are created with ->obsolete set to the value
1036         * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1037         * into this function always.
1038         */
1039        if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1040                return NULL;
1041
1042        if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1043                if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1044                        if (!rt6_has_peer(rt))
1045                                rt6_bind_peer(rt, 0);
1046                        rt->rt6i_peer_genid = rt6_peer_genid();
1047                }
1048                return dst;
1049        }
1050        return NULL;
1051}
1052
1053static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1054{
1055        struct rt6_info *rt = (struct rt6_info *) dst;
1056
1057        if (rt) {
1058                if (rt->rt6i_flags & RTF_CACHE) {
1059                        if (rt6_check_expired(rt)) {
1060                                ip6_del_rt(rt);
1061                                dst = NULL;
1062                        }
1063                } else {
1064                        dst_release(dst);
1065                        dst = NULL;
1066                }
1067        }
1068        return dst;
1069}
1070
1071static void ip6_link_failure(struct sk_buff *skb)
1072{
1073        struct rt6_info *rt;
1074
1075        icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1076
1077        rt = (struct rt6_info *) skb_dst(skb);
1078        if (rt) {
1079                if (rt->rt6i_flags & RTF_CACHE)
1080                        rt6_update_expires(rt, 0);
1081                else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1082                        rt->rt6i_node->fn_sernum = -1;
1083        }
1084}
1085
1086static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1087                               struct sk_buff *skb, u32 mtu)
1088{
1089        struct rt6_info *rt6 = (struct rt6_info*)dst;
1090
1091        dst_confirm(dst);
1092        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1093                struct net *net = dev_net(dst->dev);
1094
1095                rt6->rt6i_flags |= RTF_MODIFIED;
1096                if (mtu < IPV6_MIN_MTU) {
1097                        u32 features = dst_metric(dst, RTAX_FEATURES);
1098                        mtu = IPV6_MIN_MTU;
1099                        features |= RTAX_FEATURE_ALLFRAG;
1100                        dst_metric_set(dst, RTAX_FEATURES, features);
1101                }
1102                dst_metric_set(dst, RTAX_MTU, mtu);
1103                rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1104        }
1105}
1106
1107void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1108                     int oif, u32 mark)
1109{
1110        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1111        struct dst_entry *dst;
1112        struct flowi6 fl6;
1113
1114        memset(&fl6, 0, sizeof(fl6));
1115        fl6.flowi6_oif = oif;
1116        fl6.flowi6_mark = mark;
1117        fl6.flowi6_flags = 0;
1118        fl6.daddr = iph->daddr;
1119        fl6.saddr = iph->saddr;
1120        fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1121
1122        dst = ip6_route_output(net, NULL, &fl6);
1123        if (!dst->error)
1124                ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1125        dst_release(dst);
1126}
1127EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1128
1129void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1130{
1131        ip6_update_pmtu(skb, sock_net(sk), mtu,
1132                        sk->sk_bound_dev_if, sk->sk_mark);
1133}
1134EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1135
1136void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1137{
1138        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1139        struct dst_entry *dst;
1140        struct flowi6 fl6;
1141
1142        memset(&fl6, 0, sizeof(fl6));
1143        fl6.flowi6_oif = oif;
1144        fl6.flowi6_mark = mark;
1145        fl6.flowi6_flags = 0;
1146        fl6.daddr = iph->daddr;
1147        fl6.saddr = iph->saddr;
1148        fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1149
1150        dst = ip6_route_output(net, NULL, &fl6);
1151        if (!dst->error)
1152                rt6_do_redirect(dst, NULL, skb);
1153        dst_release(dst);
1154}
1155EXPORT_SYMBOL_GPL(ip6_redirect);
1156
1157void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1158{
1159        ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1160}
1161EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1162
1163static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1164{
1165        struct net_device *dev = dst->dev;
1166        unsigned int mtu = dst_mtu(dst);
1167        struct net *net = dev_net(dev);
1168
1169        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1170
1171        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1172                mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1173
1174        /*
1175         * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1176         * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1177         * IPV6_MAXPLEN is also valid and means: "any MSS,
1178         * rely only on pmtu discovery"
1179         */
1180        if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1181                mtu = IPV6_MAXPLEN;
1182        return mtu;
1183}
1184
1185static unsigned int ip6_mtu(const struct dst_entry *dst)
1186{
1187        struct inet6_dev *idev;
1188        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1189
1190        if (mtu)
1191                return mtu;
1192
1193        mtu = IPV6_MIN_MTU;
1194
1195        rcu_read_lock();
1196        idev = __in6_dev_get(dst->dev);
1197        if (idev)
1198                mtu = idev->cnf.mtu6;
1199        rcu_read_unlock();
1200
1201        return mtu;
1202}
1203
1204static struct dst_entry *icmp6_dst_gc_list;
1205static DEFINE_SPINLOCK(icmp6_dst_lock);
1206
1207struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1208                                  struct neighbour *neigh,
1209                                  struct flowi6 *fl6)
1210{
1211        struct dst_entry *dst;
1212        struct rt6_info *rt;
1213        struct inet6_dev *idev = in6_dev_get(dev);
1214        struct net *net = dev_net(dev);
1215
1216        if (unlikely(!idev))
1217                return ERR_PTR(-ENODEV);
1218
1219        rt = ip6_dst_alloc(net, dev, 0, NULL);
1220        if (unlikely(!rt)) {
1221                in6_dev_put(idev);
1222                dst = ERR_PTR(-ENOMEM);
1223                goto out;
1224        }
1225
1226        if (neigh)
1227                neigh_hold(neigh);
1228        else {
1229                neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1230                if (IS_ERR(neigh)) {
1231                        in6_dev_put(idev);
1232                        dst_free(&rt->dst);
1233                        return ERR_CAST(neigh);
1234                }
1235        }
1236
1237        rt->dst.flags |= DST_HOST;
1238        rt->dst.output  = ip6_output;
1239        rt->n = neigh;
1240        atomic_set(&rt->dst.__refcnt, 1);
1241        rt->rt6i_dst.addr = fl6->daddr;
1242        rt->rt6i_dst.plen = 128;
1243        rt->rt6i_idev     = idev;
1244        dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1245
1246        spin_lock_bh(&icmp6_dst_lock);
1247        rt->dst.next = icmp6_dst_gc_list;
1248        icmp6_dst_gc_list = &rt->dst;
1249        spin_unlock_bh(&icmp6_dst_lock);
1250
1251        fib6_force_start_gc(net);
1252
1253        dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1254
1255out:
1256        return dst;
1257}
1258
1259int icmp6_dst_gc(void)
1260{
1261        struct dst_entry *dst, **pprev;
1262        int more = 0;
1263
1264        spin_lock_bh(&icmp6_dst_lock);
1265        pprev = &icmp6_dst_gc_list;
1266
1267        while ((dst = *pprev) != NULL) {
1268                if (!atomic_read(&dst->__refcnt)) {
1269                        *pprev = dst->next;
1270                        dst_free(dst);
1271                } else {
1272                        pprev = &dst->next;
1273                        ++more;
1274                }
1275        }
1276
1277        spin_unlock_bh(&icmp6_dst_lock);
1278
1279        return more;
1280}
1281
1282static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1283                            void *arg)
1284{
1285        struct dst_entry *dst, **pprev;
1286
1287        spin_lock_bh(&icmp6_dst_lock);
1288        pprev = &icmp6_dst_gc_list;
1289        while ((dst = *pprev) != NULL) {
1290                struct rt6_info *rt = (struct rt6_info *) dst;
1291                if (func(rt, arg)) {
1292                        *pprev = dst->next;
1293                        dst_free(dst);
1294                } else {
1295                        pprev = &dst->next;
1296                }
1297        }
1298        spin_unlock_bh(&icmp6_dst_lock);
1299}
1300
1301static int ip6_dst_gc(struct dst_ops *ops)
1302{
1303        unsigned long now = jiffies;
1304        struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1305        int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1306        int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1307        int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1308        int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1309        unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1310        int entries;
1311
1312        entries = dst_entries_get_fast(ops);
1313        if (time_after(rt_last_gc + rt_min_interval, now) &&
1314            entries <= rt_max_size)
1315                goto out;
1316
1317        net->ipv6.ip6_rt_gc_expire++;
1318        fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1319        net->ipv6.ip6_rt_last_gc = now;
1320        entries = dst_entries_get_slow(ops);
1321        if (entries < ops->gc_thresh)
1322                net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1323out:
1324        net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1325        return entries > rt_max_size;
1326}
1327
1328/* Clean host part of a prefix. Not necessary in radix tree,
1329   but results in cleaner routing tables.
1330
1331   Remove it only when all the things will work!
1332 */
1333
1334int ip6_dst_hoplimit(struct dst_entry *dst)
1335{
1336        int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1337        if (hoplimit == 0) {
1338                struct net_device *dev = dst->dev;
1339                struct inet6_dev *idev;
1340
1341                rcu_read_lock();
1342                idev = __in6_dev_get(dev);
1343                if (idev)
1344                        hoplimit = idev->cnf.hop_limit;
1345                else
1346                        hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1347                rcu_read_unlock();
1348        }
1349        return hoplimit;
1350}
1351EXPORT_SYMBOL(ip6_dst_hoplimit);
1352
1353/*
1354 *
1355 */
1356
1357int ip6_route_add(struct fib6_config *cfg)
1358{
1359        int err;
1360        struct net *net = cfg->fc_nlinfo.nl_net;
1361        struct rt6_info *rt = NULL;
1362        struct net_device *dev = NULL;
1363        struct inet6_dev *idev = NULL;
1364        struct fib6_table *table;
1365        int addr_type;
1366
1367        if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1368                return -EINVAL;
1369#ifndef CONFIG_IPV6_SUBTREES
1370        if (cfg->fc_src_len)
1371                return -EINVAL;
1372#endif
1373        if (cfg->fc_ifindex) {
1374                err = -ENODEV;
1375                dev = dev_get_by_index(net, cfg->fc_ifindex);
1376                if (!dev)
1377                        goto out;
1378                idev = in6_dev_get(dev);
1379                if (!idev)
1380                        goto out;
1381        }
1382
1383        if (cfg->fc_metric == 0)
1384                cfg->fc_metric = IP6_RT_PRIO_USER;
1385
1386        err = -ENOBUFS;
1387        if (cfg->fc_nlinfo.nlh &&
1388            !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1389                table = fib6_get_table(net, cfg->fc_table);
1390                if (!table) {
1391                        pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1392                        table = fib6_new_table(net, cfg->fc_table);
1393                }
1394        } else {
1395                table = fib6_new_table(net, cfg->fc_table);
1396        }
1397
1398        if (!table)
1399                goto out;
1400
1401        rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1402
1403        if (!rt) {
1404                err = -ENOMEM;
1405                goto out;
1406        }
1407
1408        if (cfg->fc_flags & RTF_EXPIRES)
1409                rt6_set_expires(rt, jiffies +
1410                                clock_t_to_jiffies(cfg->fc_expires));
1411        else
1412                rt6_clean_expires(rt);
1413
1414        if (cfg->fc_protocol == RTPROT_UNSPEC)
1415                cfg->fc_protocol = RTPROT_BOOT;
1416        rt->rt6i_protocol = cfg->fc_protocol;
1417
1418        addr_type = ipv6_addr_type(&cfg->fc_dst);
1419
1420        if (addr_type & IPV6_ADDR_MULTICAST)
1421                rt->dst.input = ip6_mc_input;
1422        else if (cfg->fc_flags & RTF_LOCAL)
1423                rt->dst.input = ip6_input;
1424        else
1425                rt->dst.input = ip6_forward;
1426
1427        rt->dst.output = ip6_output;
1428
1429        ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1430        rt->rt6i_dst.plen = cfg->fc_dst_len;
1431        if (rt->rt6i_dst.plen == 128)
1432               rt->dst.flags |= DST_HOST;
1433
1434        if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1435                u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1436                if (!metrics) {
1437                        err = -ENOMEM;
1438                        goto out;
1439                }
1440                dst_init_metrics(&rt->dst, metrics, 0);
1441        }
1442#ifdef CONFIG_IPV6_SUBTREES
1443        ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1444        rt->rt6i_src.plen = cfg->fc_src_len;
1445#endif
1446
1447        rt->rt6i_metric = cfg->fc_metric;
1448
1449        /* We cannot add true routes via loopback here,
1450           they would result in kernel looping; promote them to reject routes
1451         */
1452        if ((cfg->fc_flags & RTF_REJECT) ||
1453            (dev && (dev->flags & IFF_LOOPBACK) &&
1454             !(addr_type & IPV6_ADDR_LOOPBACK) &&
1455             !(cfg->fc_flags & RTF_LOCAL))) {
1456                /* hold loopback dev/idev if we haven't done so. */
1457                if (dev != net->loopback_dev) {
1458                        if (dev) {
1459                                dev_put(dev);
1460                                in6_dev_put(idev);
1461                        }
1462                        dev = net->loopback_dev;
1463                        dev_hold(dev);
1464                        idev = in6_dev_get(dev);
1465                        if (!idev) {
1466                                err = -ENODEV;
1467                                goto out;
1468                        }
1469                }
1470                rt->dst.output = ip6_pkt_discard_out;
1471                rt->dst.input = ip6_pkt_discard;
1472                rt->dst.error = -ENETUNREACH;
1473                rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1474                goto install_route;
1475        }
1476
1477        if (cfg->fc_flags & RTF_GATEWAY) {
1478                const struct in6_addr *gw_addr;
1479                int gwa_type;
1480
1481                gw_addr = &cfg->fc_gateway;
1482                rt->rt6i_gateway = *gw_addr;
1483                gwa_type = ipv6_addr_type(gw_addr);
1484
1485                if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1486                        struct rt6_info *grt;
1487
1488                        /* IPv6 strictly inhibits using not link-local
1489                           addresses as nexthop address.
1490                           Otherwise, router will not able to send redirects.
1491                           It is very good, but in some (rare!) circumstances
1492                           (SIT, PtP, NBMA NOARP links) it is handy to allow
1493                           some exceptions. --ANK
1494                         */
1495                        err = -EINVAL;
1496                        if (!(gwa_type & IPV6_ADDR_UNICAST))
1497                                goto out;
1498
1499                        grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1500
1501                        err = -EHOSTUNREACH;
1502                        if (!grt)
1503                                goto out;
1504                        if (dev) {
1505                                if (dev != grt->dst.dev) {
1506                                        dst_release(&grt->dst);
1507                                        goto out;
1508                                }
1509                        } else {
1510                                dev = grt->dst.dev;
1511                                idev = grt->rt6i_idev;
1512                                dev_hold(dev);
1513                                in6_dev_hold(grt->rt6i_idev);
1514                        }
1515                        if (!(grt->rt6i_flags & RTF_GATEWAY))
1516                                err = 0;
1517                        dst_release(&grt->dst);
1518
1519                        if (err)
1520                                goto out;
1521                }
1522                err = -EINVAL;
1523                if (!dev || (dev->flags & IFF_LOOPBACK))
1524                        goto out;
1525        }
1526
1527        err = -ENODEV;
1528        if (!dev)
1529                goto out;
1530
1531        if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1532                if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1533                        err = -EINVAL;
1534                        goto out;
1535                }
1536                rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1537                rt->rt6i_prefsrc.plen = 128;
1538        } else
1539                rt->rt6i_prefsrc.plen = 0;
1540
1541        if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1542                err = rt6_bind_neighbour(rt, dev);
1543                if (err)
1544                        goto out;
1545        }
1546
1547        rt->rt6i_flags = cfg->fc_flags;
1548
1549install_route:
1550        if (cfg->fc_mx) {
1551                struct nlattr *nla;
1552                int remaining;
1553
1554                nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1555                        int type = nla_type(nla);
1556
1557                        if (type) {
1558                                if (type > RTAX_MAX) {
1559                                        err = -EINVAL;
1560                                        goto out;
1561                                }
1562
1563                                dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1564                        }
1565                }
1566        }
1567
1568        rt->dst.dev = dev;
1569        rt->rt6i_idev = idev;
1570        rt->rt6i_table = table;
1571
1572        cfg->fc_nlinfo.nl_net = dev_net(dev);
1573
1574        return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1575
1576out:
1577        if (dev)
1578                dev_put(dev);
1579        if (idev)
1580                in6_dev_put(idev);
1581        if (rt)
1582                dst_free(&rt->dst);
1583        return err;
1584}
1585
1586static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1587{
1588        int err;
1589        struct fib6_table *table;
1590        struct net *net = dev_net(rt->dst.dev);
1591
1592        if (rt == net->ipv6.ip6_null_entry) {
1593                err = -ENOENT;
1594                goto out;
1595        }
1596
1597        table = rt->rt6i_table;
1598        write_lock_bh(&table->tb6_lock);
1599        err = fib6_del(rt, info);
1600        write_unlock_bh(&table->tb6_lock);
1601
1602out:
1603        dst_release(&rt->dst);
1604        return err;
1605}
1606
1607int ip6_del_rt(struct rt6_info *rt)
1608{
1609        struct nl_info info = {
1610                .nl_net = dev_net(rt->dst.dev),
1611        };
1612        return __ip6_del_rt(rt, &info);
1613}
1614
1615static int ip6_route_del(struct fib6_config *cfg)
1616{
1617        struct fib6_table *table;
1618        struct fib6_node *fn;
1619        struct rt6_info *rt;
1620        int err = -ESRCH;
1621
1622        table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1623        if (!table)
1624                return err;
1625
1626        read_lock_bh(&table->tb6_lock);
1627
1628        fn = fib6_locate(&table->tb6_root,
1629                         &cfg->fc_dst, cfg->fc_dst_len,
1630                         &cfg->fc_src, cfg->fc_src_len);
1631
1632        if (fn) {
1633                for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1634                        if (cfg->fc_ifindex &&
1635                            (!rt->dst.dev ||
1636                             rt->dst.dev->ifindex != cfg->fc_ifindex))
1637                                continue;
1638                        if (cfg->fc_flags & RTF_GATEWAY &&
1639                            !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1640                                continue;
1641                        if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1642                                continue;
1643                        dst_hold(&rt->dst);
1644                        read_unlock_bh(&table->tb6_lock);
1645
1646                        return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1647                }
1648        }
1649        read_unlock_bh(&table->tb6_lock);
1650
1651        return err;
1652}
1653
1654static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1655{
1656        struct net *net = dev_net(skb->dev);
1657        struct netevent_redirect netevent;
1658        struct rt6_info *rt, *nrt = NULL;
1659        const struct in6_addr *target;
1660        struct ndisc_options ndopts;
1661        const struct in6_addr *dest;
1662        struct neighbour *old_neigh;
1663        struct inet6_dev *in6_dev;
1664        struct neighbour *neigh;
1665        struct icmp6hdr *icmph;
1666        int optlen, on_link;
1667        u8 *lladdr;
1668
1669        optlen = skb->tail - skb->transport_header;
1670        optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1671
1672        if (optlen < 0) {
1673                net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1674                return;
1675        }
1676
1677        icmph = icmp6_hdr(skb);
1678        target = (const struct in6_addr *) (icmph + 1);
1679        dest = target + 1;
1680
1681        if (ipv6_addr_is_multicast(dest)) {
1682                net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1683                return;
1684        }
1685
1686        on_link = 0;
1687        if (ipv6_addr_equal(dest, target)) {
1688                on_link = 1;
1689        } else if (ipv6_addr_type(target) !=
1690                   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1691                net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1692                return;
1693        }
1694
1695        in6_dev = __in6_dev_get(skb->dev);
1696        if (!in6_dev)
1697                return;
1698        if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1699                return;
1700
1701        /* RFC2461 8.1:
1702         *      The IP source address of the Redirect MUST be the same as the current
1703         *      first-hop router for the specified ICMP Destination Address.
1704         */
1705
1706        if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1707                net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1708                return;
1709        }
1710
1711        lladdr = NULL;
1712        if (ndopts.nd_opts_tgt_lladdr) {
1713                lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1714                                             skb->dev);
1715                if (!lladdr) {
1716                        net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1717                        return;
1718                }
1719        }
1720
1721        rt = (struct rt6_info *) dst;
1722        if (rt == net->ipv6.ip6_null_entry) {
1723                net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1724                return;
1725        }
1726
1727        /* Redirect received -> path was valid.
1728         * Look, redirects are sent only in response to data packets,
1729         * so that this nexthop apparently is reachable. --ANK
1730         */
1731        dst_confirm(&rt->dst);
1732
1733        neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1734        if (!neigh)
1735                return;
1736
1737        /* Duplicate redirect: silently ignore. */
1738        old_neigh = rt->n;
1739        if (neigh == old_neigh)
1740                goto out;
1741
1742        /*
1743         *      We have finally decided to accept it.
1744         */
1745
1746        neigh_update(neigh, lladdr, NUD_STALE,
1747                     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1748                     NEIGH_UPDATE_F_OVERRIDE|
1749                     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1750                                     NEIGH_UPDATE_F_ISROUTER))
1751                     );
1752
1753        nrt = ip6_rt_copy(rt, dest);
1754        if (!nrt)
1755                goto out;
1756
1757        nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1758        if (on_link)
1759                nrt->rt6i_flags &= ~RTF_GATEWAY;
1760
1761        nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1762        nrt->n = neigh_clone(neigh);
1763
1764        if (ip6_ins_rt(nrt))
1765                goto out;
1766
1767        netevent.old = &rt->dst;
1768        netevent.old_neigh = old_neigh;
1769        netevent.new = &nrt->dst;
1770        netevent.new_neigh = neigh;
1771        netevent.daddr = dest;
1772        call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1773
1774        if (rt->rt6i_flags & RTF_CACHE) {
1775                rt = (struct rt6_info *) dst_clone(&rt->dst);
1776                ip6_del_rt(rt);
1777        }
1778
1779out:
1780        neigh_release(neigh);
1781}
1782
1783/*
1784 *      Misc support functions
1785 */
1786
1787static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1788                                    const struct in6_addr *dest)
1789{
1790        struct net *net = dev_net(ort->dst.dev);
1791        struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1792                                            ort->rt6i_table);
1793
1794        if (rt) {
1795                rt->dst.input = ort->dst.input;
1796                rt->dst.output = ort->dst.output;
1797                rt->dst.flags |= DST_HOST;
1798
1799                rt->rt6i_dst.addr = *dest;
1800                rt->rt6i_dst.plen = 128;
1801                dst_copy_metrics(&rt->dst, &ort->dst);
1802                rt->dst.error = ort->dst.error;
1803                rt->rt6i_idev = ort->rt6i_idev;
1804                if (rt->rt6i_idev)
1805                        in6_dev_hold(rt->rt6i_idev);
1806                rt->dst.lastuse = jiffies;
1807
1808                rt->rt6i_gateway = ort->rt6i_gateway;
1809                rt->rt6i_flags = ort->rt6i_flags;
1810                if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1811                    (RTF_DEFAULT | RTF_ADDRCONF))
1812                        rt6_set_from(rt, ort);
1813                else
1814                        rt6_clean_expires(rt);
1815                rt->rt6i_metric = 0;
1816
1817#ifdef CONFIG_IPV6_SUBTREES
1818                memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1819#endif
1820                memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1821                rt->rt6i_table = ort->rt6i_table;
1822        }
1823        return rt;
1824}
1825
1826#ifdef CONFIG_IPV6_ROUTE_INFO
1827static struct rt6_info *rt6_get_route_info(struct net *net,
1828                                           const struct in6_addr *prefix, int prefixlen,
1829                                           const struct in6_addr *gwaddr, int ifindex)
1830{
1831        struct fib6_node *fn;
1832        struct rt6_info *rt = NULL;
1833        struct fib6_table *table;
1834
1835        table = fib6_get_table(net, RT6_TABLE_INFO);
1836        if (!table)
1837                return NULL;
1838
1839        write_lock_bh(&table->tb6_lock);
1840        fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1841        if (!fn)
1842                goto out;
1843
1844        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1845                if (rt->dst.dev->ifindex != ifindex)
1846                        continue;
1847                if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1848                        continue;
1849                if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1850                        continue;
1851                dst_hold(&rt->dst);
1852                break;
1853        }
1854out:
1855        write_unlock_bh(&table->tb6_lock);
1856        return rt;
1857}
1858
1859static struct rt6_info *rt6_add_route_info(struct net *net,
1860                                           const struct in6_addr *prefix, int prefixlen,
1861                                           const struct in6_addr *gwaddr, int ifindex,
1862                                           unsigned int pref)
1863{
1864        struct fib6_config cfg = {
1865                .fc_table       = RT6_TABLE_INFO,
1866                .fc_metric      = IP6_RT_PRIO_USER,
1867                .fc_ifindex     = ifindex,
1868                .fc_dst_len     = prefixlen,
1869                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1870                                  RTF_UP | RTF_PREF(pref),
1871                .fc_nlinfo.pid = 0,
1872                .fc_nlinfo.nlh = NULL,
1873                .fc_nlinfo.nl_net = net,
1874        };
1875
1876        cfg.fc_dst = *prefix;
1877        cfg.fc_gateway = *gwaddr;
1878
1879        /* We should treat it as a default route if prefix length is 0. */
1880        if (!prefixlen)
1881                cfg.fc_flags |= RTF_DEFAULT;
1882
1883        ip6_route_add(&cfg);
1884
1885        return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1886}
1887#endif
1888
1889struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1890{
1891        struct rt6_info *rt;
1892        struct fib6_table *table;
1893
1894        table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1895        if (!table)
1896                return NULL;
1897
1898        write_lock_bh(&table->tb6_lock);
1899        for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1900                if (dev == rt->dst.dev &&
1901                    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1902                    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1903                        break;
1904        }
1905        if (rt)
1906                dst_hold(&rt->dst);
1907        write_unlock_bh(&table->tb6_lock);
1908        return rt;
1909}
1910
1911struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1912                                     struct net_device *dev,
1913                                     unsigned int pref)
1914{
1915        struct fib6_config cfg = {
1916                .fc_table       = RT6_TABLE_DFLT,
1917                .fc_metric      = IP6_RT_PRIO_USER,
1918                .fc_ifindex     = dev->ifindex,
1919                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1920                                  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1921                .fc_nlinfo.pid = 0,
1922                .fc_nlinfo.nlh = NULL,
1923                .fc_nlinfo.nl_net = dev_net(dev),
1924        };
1925
1926        cfg.fc_gateway = *gwaddr;
1927
1928        ip6_route_add(&cfg);
1929
1930        return rt6_get_dflt_router(gwaddr, dev);
1931}
1932
1933void rt6_purge_dflt_routers(struct net *net)
1934{
1935        struct rt6_info *rt;
1936        struct fib6_table *table;
1937
1938        /* NOTE: Keep consistent with rt6_get_dflt_router */
1939        table = fib6_get_table(net, RT6_TABLE_DFLT);
1940        if (!table)
1941                return;
1942
1943restart:
1944        read_lock_bh(&table->tb6_lock);
1945        for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1946                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1947                        dst_hold(&rt->dst);
1948                        read_unlock_bh(&table->tb6_lock);
1949                        ip6_del_rt(rt);
1950                        goto restart;
1951                }
1952        }
1953        read_unlock_bh(&table->tb6_lock);
1954}
1955
1956static void rtmsg_to_fib6_config(struct net *net,
1957                                 struct in6_rtmsg *rtmsg,
1958                                 struct fib6_config *cfg)
1959{
1960        memset(cfg, 0, sizeof(*cfg));
1961
1962        cfg->fc_table = RT6_TABLE_MAIN;
1963        cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1964        cfg->fc_metric = rtmsg->rtmsg_metric;
1965        cfg->fc_expires = rtmsg->rtmsg_info;
1966        cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1967        cfg->fc_src_len = rtmsg->rtmsg_src_len;
1968        cfg->fc_flags = rtmsg->rtmsg_flags;
1969
1970        cfg->fc_nlinfo.nl_net = net;
1971
1972        cfg->fc_dst = rtmsg->rtmsg_dst;
1973        cfg->fc_src = rtmsg->rtmsg_src;
1974        cfg->fc_gateway = rtmsg->rtmsg_gateway;
1975}
1976
1977int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1978{
1979        struct fib6_config cfg;
1980        struct in6_rtmsg rtmsg;
1981        int err;
1982
1983        switch(cmd) {
1984        case SIOCADDRT:         /* Add a route */
1985        case SIOCDELRT:         /* Delete a route */
1986                if (!capable(CAP_NET_ADMIN))
1987                        return -EPERM;
1988                err = copy_from_user(&rtmsg, arg,
1989                                     sizeof(struct in6_rtmsg));
1990                if (err)
1991                        return -EFAULT;
1992
1993                rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1994
1995                rtnl_lock();
1996                switch (cmd) {
1997                case SIOCADDRT:
1998                        err = ip6_route_add(&cfg);
1999                        break;
2000                case SIOCDELRT:
2001                        err = ip6_route_del(&cfg);
2002                        break;
2003                default:
2004                        err = -EINVAL;
2005                }
2006                rtnl_unlock();
2007
2008                return err;
2009        }
2010
2011        return -EINVAL;
2012}
2013
2014/*
2015 *      Drop the packet on the floor
2016 */
2017
2018static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2019{
2020        int type;
2021        struct dst_entry *dst = skb_dst(skb);
2022        switch (ipstats_mib_noroutes) {
2023        case IPSTATS_MIB_INNOROUTES:
2024                type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2025                if (type == IPV6_ADDR_ANY) {
2026                        IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2027                                      IPSTATS_MIB_INADDRERRORS);
2028                        break;
2029                }
2030                /* FALLTHROUGH */
2031        case IPSTATS_MIB_OUTNOROUTES:
2032                IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2033                              ipstats_mib_noroutes);
2034                break;
2035        }
2036        icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2037        kfree_skb(skb);
2038        return 0;
2039}
2040
2041static int ip6_pkt_discard(struct sk_buff *skb)
2042{
2043        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2044}
2045
2046static int ip6_pkt_discard_out(struct sk_buff *skb)
2047{
2048        skb->dev = skb_dst(skb)->dev;
2049        return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2050}
2051
2052#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2053
2054static int ip6_pkt_prohibit(struct sk_buff *skb)
2055{
2056        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2057}
2058
2059static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2060{
2061        skb->dev = skb_dst(skb)->dev;
2062        return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2063}
2064
2065#endif
2066
2067/*
2068 *      Allocate a dst for local (unicast / anycast) address.
2069 */
2070
2071struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2072                                    const struct in6_addr *addr,
2073                                    bool anycast)
2074{
2075        struct net *net = dev_net(idev->dev);
2076        struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2077        int err;
2078
2079        if (!rt) {
2080                net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2081                return ERR_PTR(-ENOMEM);
2082        }
2083
2084        in6_dev_hold(idev);
2085
2086        rt->dst.flags |= DST_HOST;
2087        rt->dst.input = ip6_input;
2088        rt->dst.output = ip6_output;
2089        rt->rt6i_idev = idev;
2090
2091        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2092        if (anycast)
2093                rt->rt6i_flags |= RTF_ANYCAST;
2094        else
2095                rt->rt6i_flags |= RTF_LOCAL;
2096        err = rt6_bind_neighbour(rt, rt->dst.dev);
2097        if (err) {
2098                dst_free(&rt->dst);
2099                return ERR_PTR(err);
2100        }
2101
2102        rt->rt6i_dst.addr = *addr;
2103        rt->rt6i_dst.plen = 128;
2104        rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2105
2106        atomic_set(&rt->dst.__refcnt, 1);
2107
2108        return rt;
2109}
2110
2111int ip6_route_get_saddr(struct net *net,
2112                        struct rt6_info *rt,
2113                        const struct in6_addr *daddr,
2114                        unsigned int prefs,
2115                        struct in6_addr *saddr)
2116{
2117        struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2118        int err = 0;
2119        if (rt->rt6i_prefsrc.plen)
2120                *saddr = rt->rt6i_prefsrc.addr;
2121        else
2122                err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2123                                         daddr, prefs, saddr);
2124        return err;
2125}
2126
2127/* remove deleted ip from prefsrc entries */
2128struct arg_dev_net_ip {
2129        struct net_device *dev;
2130        struct net *net;
2131        struct in6_addr *addr;
2132};
2133
2134static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2135{
2136        struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2137        struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2138        struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2139
2140        if (((void *)rt->dst.dev == dev || !dev) &&
2141            rt != net->ipv6.ip6_null_entry &&
2142            ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2143                /* remove prefsrc entry */
2144                rt->rt6i_prefsrc.plen = 0;
2145        }
2146        return 0;
2147}
2148
2149void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2150{
2151        struct net *net = dev_net(ifp->idev->dev);
2152        struct arg_dev_net_ip adni = {
2153                .dev = ifp->idev->dev,
2154                .net = net,
2155                .addr = &ifp->addr,
2156        };
2157        fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2158}
2159
2160struct arg_dev_net {
2161        struct net_device *dev;
2162        struct net *net;
2163};
2164
2165static int fib6_ifdown(struct rt6_info *rt, void *arg)
2166{
2167        const struct arg_dev_net *adn = arg;
2168        const struct net_device *dev = adn->dev;
2169
2170        if ((rt->dst.dev == dev || !dev) &&
2171            rt != adn->net->ipv6.ip6_null_entry)
2172                return -1;
2173
2174        return 0;
2175}
2176
2177void rt6_ifdown(struct net *net, struct net_device *dev)
2178{
2179        struct arg_dev_net adn = {
2180                .dev = dev,
2181                .net = net,
2182        };
2183
2184        fib6_clean_all(net, fib6_ifdown, 0, &adn);
2185        icmp6_clean_all(fib6_ifdown, &adn);
2186}
2187
2188struct rt6_mtu_change_arg {
2189        struct net_device *dev;
2190        unsigned int mtu;
2191};
2192
2193static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2194{
2195        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2196        struct inet6_dev *idev;
2197
2198        /* In IPv6 pmtu discovery is not optional,
2199           so that RTAX_MTU lock cannot disable it.
2200           We still use this lock to block changes
2201           caused by addrconf/ndisc.
2202        */
2203
2204        idev = __in6_dev_get(arg->dev);
2205        if (!idev)
2206                return 0;
2207
2208        /* For administrative MTU increase, there is no way to discover
2209           IPv6 PMTU increase, so PMTU increase should be updated here.
2210           Since RFC 1981 doesn't include administrative MTU increase
2211           update PMTU increase is a MUST. (i.e. jumbo frame)
2212         */
2213        /*
2214           If new MTU is less than route PMTU, this new MTU will be the
2215           lowest MTU in the path, update the route PMTU to reflect PMTU
2216           decreases; if new MTU is greater than route PMTU, and the
2217           old MTU is the lowest MTU in the path, update the route PMTU
2218           to reflect the increase. In this case if the other nodes' MTU
2219           also have the lowest MTU, TOO BIG MESSAGE will be lead to
2220           PMTU discouvery.
2221         */
2222        if (rt->dst.dev == arg->dev &&
2223            !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2224            (dst_mtu(&rt->dst) >= arg->mtu ||
2225             (dst_mtu(&rt->dst) < arg->mtu &&
2226              dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2227                dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2228        }
2229        return 0;
2230}
2231
2232void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2233{
2234        struct rt6_mtu_change_arg arg = {
2235                .dev = dev,
2236                .mtu = mtu,
2237        };
2238
2239        fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2240}
2241
2242static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2243        [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2244        [RTA_OIF]               = { .type = NLA_U32 },
2245        [RTA_IIF]               = { .type = NLA_U32 },
2246        [RTA_PRIORITY]          = { .type = NLA_U32 },
2247        [RTA_METRICS]           = { .type = NLA_NESTED },
2248};
2249
2250static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2251                              struct fib6_config *cfg)
2252{
2253        struct rtmsg *rtm;
2254        struct nlattr *tb[RTA_MAX+1];
2255        int err;
2256
2257        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2258        if (err < 0)
2259                goto errout;
2260
2261        err = -EINVAL;
2262        rtm = nlmsg_data(nlh);
2263        memset(cfg, 0, sizeof(*cfg));
2264
2265        cfg->fc_table = rtm->rtm_table;
2266        cfg->fc_dst_len = rtm->rtm_dst_len;
2267        cfg->fc_src_len = rtm->rtm_src_len;
2268        cfg->fc_flags = RTF_UP;
2269        cfg->fc_protocol = rtm->rtm_protocol;
2270
2271        if (rtm->rtm_type == RTN_UNREACHABLE)
2272                cfg->fc_flags |= RTF_REJECT;
2273
2274        if (rtm->rtm_type == RTN_LOCAL)
2275                cfg->fc_flags |= RTF_LOCAL;
2276
2277        cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2278        cfg->fc_nlinfo.nlh = nlh;
2279        cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2280
2281        if (tb[RTA_GATEWAY]) {
2282                nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2283                cfg->fc_flags |= RTF_GATEWAY;
2284        }
2285
2286        if (tb[RTA_DST]) {
2287                int plen = (rtm->rtm_dst_len + 7) >> 3;
2288
2289                if (nla_len(tb[RTA_DST]) < plen)
2290                        goto errout;
2291
2292                nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2293        }
2294
2295        if (tb[RTA_SRC]) {
2296                int plen = (rtm->rtm_src_len + 7) >> 3;
2297
2298                if (nla_len(tb[RTA_SRC]) < plen)
2299                        goto errout;
2300
2301                nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2302        }
2303
2304        if (tb[RTA_PREFSRC])
2305                nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2306
2307        if (tb[RTA_OIF])
2308                cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2309
2310        if (tb[RTA_PRIORITY])
2311                cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2312
2313        if (tb[RTA_METRICS]) {
2314                cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2315                cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2316        }
2317
2318        if (tb[RTA_TABLE])
2319                cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2320
2321        err = 0;
2322errout:
2323        return err;
2324}
2325
2326static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2327{
2328        struct fib6_config cfg;
2329        int err;
2330
2331        err = rtm_to_fib6_config(skb, nlh, &cfg);
2332        if (err < 0)
2333                return err;
2334
2335        return ip6_route_del(&cfg);
2336}
2337
2338static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2339{
2340        struct fib6_config cfg;
2341        int err;
2342
2343        err = rtm_to_fib6_config(skb, nlh, &cfg);
2344        if (err < 0)
2345                return err;
2346
2347        return ip6_route_add(&cfg);
2348}
2349
2350static inline size_t rt6_nlmsg_size(void)
2351{
2352        return NLMSG_ALIGN(sizeof(struct rtmsg))
2353               + nla_total_size(16) /* RTA_SRC */
2354               + nla_total_size(16) /* RTA_DST */
2355               + nla_total_size(16) /* RTA_GATEWAY */
2356               + nla_total_size(16) /* RTA_PREFSRC */
2357               + nla_total_size(4) /* RTA_TABLE */
2358               + nla_total_size(4) /* RTA_IIF */
2359               + nla_total_size(4) /* RTA_OIF */
2360               + nla_total_size(4) /* RTA_PRIORITY */
2361               + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2362               + nla_total_size(sizeof(struct rta_cacheinfo));
2363}
2364
2365static int rt6_fill_node(struct net *net,
2366                         struct sk_buff *skb, struct rt6_info *rt,
2367                         struct in6_addr *dst, struct in6_addr *src,
2368                         int iif, int type, u32 pid, u32 seq,
2369                         int prefix, int nowait, unsigned int flags)
2370{
2371        struct rtmsg *rtm;
2372        struct nlmsghdr *nlh;
2373        long expires;
2374        u32 table;
2375        struct neighbour *n;
2376
2377        if (prefix) {   /* user wants prefix routes only */
2378                if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2379                        /* success since this is not a prefix route */
2380                        return 1;
2381                }
2382        }
2383
2384        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2385        if (!nlh)
2386                return -EMSGSIZE;
2387
2388        rtm = nlmsg_data(nlh);
2389        rtm->rtm_family = AF_INET6;
2390        rtm->rtm_dst_len = rt->rt6i_dst.plen;
2391        rtm->rtm_src_len = rt->rt6i_src.plen;
2392        rtm->rtm_tos = 0;
2393        if (rt->rt6i_table)
2394                table = rt->rt6i_table->tb6_id;
2395        else
2396                table = RT6_TABLE_UNSPEC;
2397        rtm->rtm_table = table;
2398        if (nla_put_u32(skb, RTA_TABLE, table))
2399                goto nla_put_failure;
2400        if (rt->rt6i_flags & RTF_REJECT)
2401                rtm->rtm_type = RTN_UNREACHABLE;
2402        else if (rt->rt6i_flags & RTF_LOCAL)
2403                rtm->rtm_type = RTN_LOCAL;
2404        else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2405                rtm->rtm_type = RTN_LOCAL;
2406        else
2407                rtm->rtm_type = RTN_UNICAST;
2408        rtm->rtm_flags = 0;
2409        rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2410        rtm->rtm_protocol = rt->rt6i_protocol;
2411        if (rt->rt6i_flags & RTF_DYNAMIC)
2412                rtm->rtm_protocol = RTPROT_REDIRECT;
2413        else if (rt->rt6i_flags & RTF_ADDRCONF) {
2414                if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2415                        rtm->rtm_protocol = RTPROT_RA;
2416                else
2417                        rtm->rtm_protocol = RTPROT_KERNEL;
2418        }
2419
2420        if (rt->rt6i_flags & RTF_CACHE)
2421                rtm->rtm_flags |= RTM_F_CLONED;
2422
2423        if (dst) {
2424                if (nla_put(skb, RTA_DST, 16, dst))
2425                        goto nla_put_failure;
2426                rtm->rtm_dst_len = 128;
2427        } else if (rtm->rtm_dst_len)
2428                if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2429                        goto nla_put_failure;
2430#ifdef CONFIG_IPV6_SUBTREES
2431        if (src) {
2432                if (nla_put(skb, RTA_SRC, 16, src))
2433                        goto nla_put_failure;
2434                rtm->rtm_src_len = 128;
2435        } else if (rtm->rtm_src_len &&
2436                   nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2437                goto nla_put_failure;
2438#endif
2439        if (iif) {
2440#ifdef CONFIG_IPV6_MROUTE
2441                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2442                        int err = ip6mr_get_route(net, skb, rtm, nowait);
2443                        if (err <= 0) {
2444                                if (!nowait) {
2445                                        if (err == 0)
2446                                                return 0;
2447                                        goto nla_put_failure;
2448                                } else {
2449                                        if (err == -EMSGSIZE)
2450                                                goto nla_put_failure;
2451                                }
2452                        }
2453                } else
2454#endif
2455                        if (nla_put_u32(skb, RTA_IIF, iif))
2456                                goto nla_put_failure;
2457        } else if (dst) {
2458                struct in6_addr saddr_buf;
2459                if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2460                    nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2461                        goto nla_put_failure;
2462        }
2463
2464        if (rt->rt6i_prefsrc.plen) {
2465                struct in6_addr saddr_buf;
2466                saddr_buf = rt->rt6i_prefsrc.addr;
2467                if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2468                        goto nla_put_failure;
2469        }
2470
2471        if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2472                goto nla_put_failure;
2473
2474        rcu_read_lock();
2475        n = rt->n;
2476        if (n) {
2477                if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2478                        rcu_read_unlock();
2479                        goto nla_put_failure;
2480                }
2481        }
2482        rcu_read_unlock();
2483
2484        if (rt->dst.dev &&
2485            nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2486                goto nla_put_failure;
2487        if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2488                goto nla_put_failure;
2489
2490        expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2491
2492        if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2493                goto nla_put_failure;
2494
2495        return nlmsg_end(skb, nlh);
2496
2497nla_put_failure:
2498        nlmsg_cancel(skb, nlh);
2499        return -EMSGSIZE;
2500}
2501
2502int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2503{
2504        struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2505        int prefix;
2506
2507        if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2508                struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2509                prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2510        } else
2511                prefix = 0;
2512
2513        return rt6_fill_node(arg->net,
2514                     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2515                     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2516                     prefix, 0, NLM_F_MULTI);
2517}
2518
2519static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2520{
2521        struct net *net = sock_net(in_skb->sk);
2522        struct nlattr *tb[RTA_MAX+1];
2523        struct rt6_info *rt;
2524        struct sk_buff *skb;
2525        struct rtmsg *rtm;
2526        struct flowi6 fl6;
2527        int err, iif = 0, oif = 0;
2528
2529        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2530        if (err < 0)
2531                goto errout;
2532
2533        err = -EINVAL;
2534        memset(&fl6, 0, sizeof(fl6));
2535
2536        if (tb[RTA_SRC]) {
2537                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2538                        goto errout;
2539
2540                fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2541        }
2542
2543        if (tb[RTA_DST]) {
2544                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2545                        goto errout;
2546
2547                fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2548        }
2549
2550        if (tb[RTA_IIF])
2551                iif = nla_get_u32(tb[RTA_IIF]);
2552
2553        if (tb[RTA_OIF])
2554                oif = nla_get_u32(tb[RTA_OIF]);
2555
2556        if (iif) {
2557                struct net_device *dev;
2558                int flags = 0;
2559
2560                dev = __dev_get_by_index(net, iif);
2561                if (!dev) {
2562                        err = -ENODEV;
2563                        goto errout;
2564                }
2565
2566                fl6.flowi6_iif = iif;
2567
2568                if (!ipv6_addr_any(&fl6.saddr))
2569                        flags |= RT6_LOOKUP_F_HAS_SADDR;
2570
2571                rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2572                                                               flags);
2573        } else {
2574                fl6.flowi6_oif = oif;
2575
2576                rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2577        }
2578
2579        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2580        if (!skb) {
2581                dst_release(&rt->dst);
2582                err = -ENOBUFS;
2583                goto errout;
2584        }
2585
2586        /* Reserve room for dummy headers, this skb can pass
2587           through good chunk of routing engine.
2588         */
2589        skb_reset_mac_header(skb);
2590        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2591
2592        skb_dst_set(skb, &rt->dst);
2593
2594        err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2595                            RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2596                            nlh->nlmsg_seq, 0, 0, 0);
2597        if (err < 0) {
2598                kfree_skb(skb);
2599                goto errout;
2600        }
2601
2602        err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2603errout:
2604        return err;
2605}
2606
2607void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2608{
2609        struct sk_buff *skb;
2610        struct net *net = info->nl_net;
2611        u32 seq;
2612        int err;
2613
2614        err = -ENOBUFS;
2615        seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2616
2617        skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2618        if (!skb)
2619                goto errout;
2620
2621        err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2622                                event, info->pid, seq, 0, 0, 0);
2623        if (err < 0) {
2624                /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2625                WARN_ON(err == -EMSGSIZE);
2626                kfree_skb(skb);
2627                goto errout;
2628        }
2629        rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2630                    info->nlh, gfp_any());
2631        return;
2632errout:
2633        if (err < 0)
2634                rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2635}
2636
2637static int ip6_route_dev_notify(struct notifier_block *this,
2638                                unsigned long event, void *data)
2639{
2640        struct net_device *dev = (struct net_device *)data;
2641        struct net *net = dev_net(dev);
2642
2643        if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2644                net->ipv6.ip6_null_entry->dst.dev = dev;
2645                net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2646#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2647                net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2648                net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2649                net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2650                net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2651#endif
2652        }
2653
2654        return NOTIFY_OK;
2655}
2656
2657/*
2658 *      /proc
2659 */
2660
2661#ifdef CONFIG_PROC_FS
2662
2663struct rt6_proc_arg
2664{
2665        char *buffer;
2666        int offset;
2667        int length;
2668        int skip;
2669        int len;
2670};
2671
2672static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2673{
2674        struct seq_file *m = p_arg;
2675        struct neighbour *n;
2676
2677        seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2678
2679#ifdef CONFIG_IPV6_SUBTREES
2680        seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2681#else
2682        seq_puts(m, "00000000000000000000000000000000 00 ");
2683#endif
2684        rcu_read_lock();
2685        n = rt->n;
2686        if (n) {
2687                seq_printf(m, "%pi6", n->primary_key);
2688        } else {
2689                seq_puts(m, "00000000000000000000000000000000");
2690        }
2691        rcu_read_unlock();
2692        seq_printf(m, " %08x %08x %08x %08x %8s\n",
2693                   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2694                   rt->dst.__use, rt->rt6i_flags,
2695                   rt->dst.dev ? rt->dst.dev->name : "");
2696        return 0;
2697}
2698
2699static int ipv6_route_show(struct seq_file *m, void *v)
2700{
2701        struct net *net = (struct net *)m->private;
2702        fib6_clean_all_ro(net, rt6_info_route, 0, m);
2703        return 0;
2704}
2705
2706static int ipv6_route_open(struct inode *inode, struct file *file)
2707{
2708        return single_open_net(inode, file, ipv6_route_show);
2709}
2710
2711static const struct file_operations ipv6_route_proc_fops = {
2712        .owner          = THIS_MODULE,
2713        .open           = ipv6_route_open,
2714        .read           = seq_read,
2715        .llseek         = seq_lseek,
2716        .release        = single_release_net,
2717};
2718
2719static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2720{
2721        struct net *net = (struct net *)seq->private;
2722        seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2723                   net->ipv6.rt6_stats->fib_nodes,
2724                   net->ipv6.rt6_stats->fib_route_nodes,
2725                   net->ipv6.rt6_stats->fib_rt_alloc,
2726                   net->ipv6.rt6_stats->fib_rt_entries,
2727                   net->ipv6.rt6_stats->fib_rt_cache,
2728                   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2729                   net->ipv6.rt6_stats->fib_discarded_routes);
2730
2731        return 0;
2732}
2733
2734static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2735{
2736        return single_open_net(inode, file, rt6_stats_seq_show);
2737}
2738
2739static const struct file_operations rt6_stats_seq_fops = {
2740        .owner   = THIS_MODULE,
2741        .open    = rt6_stats_seq_open,
2742        .read    = seq_read,
2743        .llseek  = seq_lseek,
2744        .release = single_release_net,
2745};
2746#endif  /* CONFIG_PROC_FS */
2747
2748#ifdef CONFIG_SYSCTL
2749
2750static
2751int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2752                              void __user *buffer, size_t *lenp, loff_t *ppos)
2753{
2754        struct net *net;
2755        int delay;
2756        if (!write)
2757                return -EINVAL;
2758
2759        net = (struct net *)ctl->extra1;
2760        delay = net->ipv6.sysctl.flush_delay;
2761        proc_dointvec(ctl, write, buffer, lenp, ppos);
2762        fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2763        return 0;
2764}
2765
2766ctl_table ipv6_route_table_template[] = {
2767        {
2768                .procname       =       "flush",
2769                .data           =       &init_net.ipv6.sysctl.flush_delay,
2770                .maxlen         =       sizeof(int),
2771                .mode           =       0200,
2772                .proc_handler   =       ipv6_sysctl_rtcache_flush
2773        },
2774        {
2775                .procname       =       "gc_thresh",
2776                .data           =       &ip6_dst_ops_template.gc_thresh,
2777                .maxlen         =       sizeof(int),
2778                .mode           =       0644,
2779                .proc_handler   =       proc_dointvec,
2780        },
2781        {
2782                .procname       =       "max_size",
2783                .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2784                .maxlen         =       sizeof(int),
2785                .mode           =       0644,
2786                .proc_handler   =       proc_dointvec,
2787        },
2788        {
2789                .procname       =       "gc_min_interval",
2790                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2791                .maxlen         =       sizeof(int),
2792                .mode           =       0644,
2793                .proc_handler   =       proc_dointvec_jiffies,
2794        },
2795        {
2796                .procname       =       "gc_timeout",
2797                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2798                .maxlen         =       sizeof(int),
2799                .mode           =       0644,
2800                .proc_handler   =       proc_dointvec_jiffies,
2801        },
2802        {
2803                .procname       =       "gc_interval",
2804                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2805                .maxlen         =       sizeof(int),
2806                .mode           =       0644,
2807                .proc_handler   =       proc_dointvec_jiffies,
2808        },
2809        {
2810                .procname       =       "gc_elasticity",
2811                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2812                .maxlen         =       sizeof(int),
2813                .mode           =       0644,
2814                .proc_handler   =       proc_dointvec,
2815        },
2816        {
2817                .procname       =       "mtu_expires",
2818                .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2819                .maxlen         =       sizeof(int),
2820                .mode           =       0644,
2821                .proc_handler   =       proc_dointvec_jiffies,
2822        },
2823        {
2824                .procname       =       "min_adv_mss",
2825                .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2826                .maxlen         =       sizeof(int),
2827                .mode           =       0644,
2828                .proc_handler   =       proc_dointvec,
2829        },
2830        {
2831                .procname       =       "gc_min_interval_ms",
2832                .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2833                .maxlen         =       sizeof(int),
2834                .mode           =       0644,
2835                .proc_handler   =       proc_dointvec_ms_jiffies,
2836        },
2837        { }
2838};
2839
2840struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2841{
2842        struct ctl_table *table;
2843
2844        table = kmemdup(ipv6_route_table_template,
2845                        sizeof(ipv6_route_table_template),
2846                        GFP_KERNEL);
2847
2848        if (table) {
2849                table[0].data = &net->ipv6.sysctl.flush_delay;
2850                table[0].extra1 = net;
2851                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2852                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2853                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2854                table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2855                table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2856                table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2857                table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2858                table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2859                table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2860        }
2861
2862        return table;
2863}
2864#endif
2865
2866static int __net_init ip6_route_net_init(struct net *net)
2867{
2868        int ret = -ENOMEM;
2869
2870        memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2871               sizeof(net->ipv6.ip6_dst_ops));
2872
2873        if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2874                goto out_ip6_dst_ops;
2875
2876        net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2877                                           sizeof(*net->ipv6.ip6_null_entry),
2878                                           GFP_KERNEL);
2879        if (!net->ipv6.ip6_null_entry)
2880                goto out_ip6_dst_entries;
2881        net->ipv6.ip6_null_entry->dst.path =
2882                (struct dst_entry *)net->ipv6.ip6_null_entry;
2883        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2884        dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2885                         ip6_template_metrics, true);
2886
2887#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2888        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2889                                               sizeof(*net->ipv6.ip6_prohibit_entry),
2890                                               GFP_KERNEL);
2891        if (!net->ipv6.ip6_prohibit_entry)
2892                goto out_ip6_null_entry;
2893        net->ipv6.ip6_prohibit_entry->dst.path =
2894                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2895        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2896        dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2897                         ip6_template_metrics, true);
2898
2899        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2900                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
2901                                               GFP_KERNEL);
2902        if (!net->ipv6.ip6_blk_hole_entry)
2903                goto out_ip6_prohibit_entry;
2904        net->ipv6.ip6_blk_hole_entry->dst.path =
2905                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2906        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2907        dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2908                         ip6_template_metrics, true);
2909#endif
2910
2911        net->ipv6.sysctl.flush_delay = 0;
2912        net->ipv6.sysctl.ip6_rt_max_size = 4096;
2913        net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2914        net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2915        net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2916        net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2917        net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2918        net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2919
2920        net->ipv6.ip6_rt_gc_expire = 30*HZ;
2921
2922        ret = 0;
2923out:
2924        return ret;
2925
2926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2927out_ip6_prohibit_entry:
2928        kfree(net->ipv6.ip6_prohibit_entry);
2929out_ip6_null_entry:
2930        kfree(net->ipv6.ip6_null_entry);
2931#endif
2932out_ip6_dst_entries:
2933        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2934out_ip6_dst_ops:
2935        goto out;
2936}
2937
2938static void __net_exit ip6_route_net_exit(struct net *net)
2939{
2940        kfree(net->ipv6.ip6_null_entry);
2941#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2942        kfree(net->ipv6.ip6_prohibit_entry);
2943        kfree(net->ipv6.ip6_blk_hole_entry);
2944#endif
2945        dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2946}
2947
2948static int __net_init ip6_route_net_init_late(struct net *net)
2949{
2950#ifdef CONFIG_PROC_FS
2951        proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2952        proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2953#endif
2954        return 0;
2955}
2956
2957static void __net_exit ip6_route_net_exit_late(struct net *net)
2958{
2959#ifdef CONFIG_PROC_FS
2960        proc_net_remove(net, "ipv6_route");
2961        proc_net_remove(net, "rt6_stats");
2962#endif
2963}
2964
2965static struct pernet_operations ip6_route_net_ops = {
2966        .init = ip6_route_net_init,
2967        .exit = ip6_route_net_exit,
2968};
2969
2970static int __net_init ipv6_inetpeer_init(struct net *net)
2971{
2972        struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2973
2974        if (!bp)
2975                return -ENOMEM;
2976        inet_peer_base_init(bp);
2977        net->ipv6.peers = bp;
2978        return 0;
2979}
2980
2981static void __net_exit ipv6_inetpeer_exit(struct net *net)
2982{
2983        struct inet_peer_base *bp = net->ipv6.peers;
2984
2985        net->ipv6.peers = NULL;
2986        inetpeer_invalidate_tree(bp);
2987        kfree(bp);
2988}
2989
2990static struct pernet_operations ipv6_inetpeer_ops = {
2991        .init   =       ipv6_inetpeer_init,
2992        .exit   =       ipv6_inetpeer_exit,
2993};
2994
2995static struct pernet_operations ip6_route_net_late_ops = {
2996        .init = ip6_route_net_init_late,
2997        .exit = ip6_route_net_exit_late,
2998};
2999
3000static struct notifier_block ip6_route_dev_notifier = {
3001        .notifier_call = ip6_route_dev_notify,
3002        .priority = 0,
3003};
3004
3005int __init ip6_route_init(void)
3006{
3007        int ret;
3008
3009        ret = -ENOMEM;
3010        ip6_dst_ops_template.kmem_cachep =
3011                kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3012                                  SLAB_HWCACHE_ALIGN, NULL);
3013        if (!ip6_dst_ops_template.kmem_cachep)
3014                goto out;
3015
3016        ret = dst_entries_init(&ip6_dst_blackhole_ops);
3017        if (ret)
3018                goto out_kmem_cache;
3019
3020        ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3021        if (ret)
3022                goto out_dst_entries;
3023
3024        ret = register_pernet_subsys(&ip6_route_net_ops);
3025        if (ret)
3026                goto out_register_inetpeer;
3027
3028        ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3029
3030        /* Registering of the loopback is done before this portion of code,
3031         * the loopback reference in rt6_info will not be taken, do it
3032         * manually for init_net */
3033        init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3034        init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3035  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3036        init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3037        init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3038        init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3039        init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3040  #endif
3041        ret = fib6_init();
3042        if (ret)
3043                goto out_register_subsys;
3044
3045        ret = xfrm6_init();
3046        if (ret)
3047                goto out_fib6_init;
3048
3049        ret = fib6_rules_init();
3050        if (ret)
3051                goto xfrm6_init;
3052
3053        ret = register_pernet_subsys(&ip6_route_net_late_ops);
3054        if (ret)
3055                goto fib6_rules_init;
3056
3057        ret = -ENOBUFS;
3058        if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3059            __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3060            __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3061                goto out_register_late_subsys;
3062
3063        ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3064        if (ret)
3065                goto out_register_late_subsys;
3066
3067out:
3068        return ret;
3069
3070out_register_late_subsys:
3071        unregister_pernet_subsys(&ip6_route_net_late_ops);
3072fib6_rules_init:
3073        fib6_rules_cleanup();
3074xfrm6_init:
3075        xfrm6_fini();
3076out_fib6_init:
3077        fib6_gc_cleanup();
3078out_register_subsys:
3079        unregister_pernet_subsys(&ip6_route_net_ops);
3080out_register_inetpeer:
3081        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3082out_dst_entries:
3083        dst_entries_destroy(&ip6_dst_blackhole_ops);
3084out_kmem_cache:
3085        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3086        goto out;
3087}
3088
3089void ip6_route_cleanup(void)
3090{
3091        unregister_netdevice_notifier(&ip6_route_dev_notifier);
3092        unregister_pernet_subsys(&ip6_route_net_late_ops);
3093        fib6_rules_cleanup();
3094        xfrm6_fini();
3095        fib6_gc_cleanup();
3096        unregister_pernet_subsys(&ipv6_inetpeer_ops);
3097        unregister_pernet_subsys(&ip6_route_net_ops);
3098        dst_entries_destroy(&ip6_dst_blackhole_ops);
3099        kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3100}
3101
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.