linux/net/ipv6/ip6mr.c
<<
>>
Prefs
   1/*
   2 *      Linux IPv6 multicast routing support for BSD pim6sd
   3 *      Based on net/ipv4/ipmr.c.
   4 *
   5 *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
   6 *              LSIIT Laboratory, Strasbourg, France
   7 *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
   8 *              6WIND, Paris, France
   9 *      Copyright (C)2007,2008 USAGI/WIDE Project
  10 *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
  11 *
  12 *      This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 */
  18
  19#include <asm/uaccess.h>
  20#include <linux/types.h>
  21#include <linux/sched.h>
  22#include <linux/errno.h>
  23#include <linux/timer.h>
  24#include <linux/mm.h>
  25#include <linux/kernel.h>
  26#include <linux/fcntl.h>
  27#include <linux/stat.h>
  28#include <linux/socket.h>
  29#include <linux/inet.h>
  30#include <linux/netdevice.h>
  31#include <linux/inetdevice.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/init.h>
  35#include <linux/slab.h>
  36#include <linux/compat.h>
  37#include <net/protocol.h>
  38#include <linux/skbuff.h>
  39#include <net/sock.h>
  40#include <net/raw.h>
  41#include <linux/notifier.h>
  42#include <linux/if_arp.h>
  43#include <net/checksum.h>
  44#include <net/netlink.h>
  45#include <net/fib_rules.h>
  46
  47#include <net/ipv6.h>
  48#include <net/ip6_route.h>
  49#include <linux/mroute6.h>
  50#include <linux/pim.h>
  51#include <net/addrconf.h>
  52#include <linux/netfilter_ipv6.h>
  53#include <linux/export.h>
  54#include <net/ip6_checksum.h>
  55
  56struct mr6_table {
  57        struct list_head        list;
  58#ifdef CONFIG_NET_NS
  59        struct net              *net;
  60#endif
  61        u32                     id;
  62        struct sock             *mroute6_sk;
  63        struct timer_list       ipmr_expire_timer;
  64        struct list_head        mfc6_unres_queue;
  65        struct list_head        mfc6_cache_array[MFC6_LINES];
  66        struct mif_device       vif6_table[MAXMIFS];
  67        int                     maxvif;
  68        atomic_t                cache_resolve_queue_len;
  69        int                     mroute_do_assert;
  70        int                     mroute_do_pim;
  71#ifdef CONFIG_IPV6_PIMSM_V2
  72        int                     mroute_reg_vif_num;
  73#endif
  74};
  75
  76struct ip6mr_rule {
  77        struct fib_rule         common;
  78};
  79
  80struct ip6mr_result {
  81        struct mr6_table        *mrt;
  82};
  83
  84/* Big lock, protecting vif table, mrt cache and mroute socket state.
  85   Note that the changes are semaphored via rtnl_lock.
  86 */
  87
  88static DEFINE_RWLOCK(mrt_lock);
  89
  90/*
  91 *      Multicast router control variables
  92 */
  93
  94#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
  95
  96/* Special spinlock for queue of unresolved entries */
  97static DEFINE_SPINLOCK(mfc_unres_lock);
  98
  99/* We return to original Alan's scheme. Hash table of resolved
 100   entries is changed only in process context and protected
 101   with weak lock mrt_lock. Queue of unresolved entries is protected
 102   with strong spinlock mfc_unres_lock.
 103
 104   In this case data path is free of exclusive locks at all.
 105 */
 106
 107static struct kmem_cache *mrt_cachep __read_mostly;
 108
 109static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
 110static void ip6mr_free_table(struct mr6_table *mrt);
 111
 112static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 113                          struct sk_buff *skb, struct mfc6_cache *cache);
 114static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 115                              mifi_t mifi, int assert);
 116static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 117                               struct mfc6_cache *c, struct rtmsg *rtm);
 118static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 119                               struct netlink_callback *cb);
 120static void mroute_clean_tables(struct mr6_table *mrt);
 121static void ipmr_expire_process(unsigned long arg);
 122
 123#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 124#define ip6mr_for_each_table(mrt, net) \
 125        list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 126
 127static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 128{
 129        struct mr6_table *mrt;
 130
 131        ip6mr_for_each_table(mrt, net) {
 132                if (mrt->id == id)
 133                        return mrt;
 134        }
 135        return NULL;
 136}
 137
 138static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 139                            struct mr6_table **mrt)
 140{
 141        struct ip6mr_result res;
 142        struct fib_lookup_arg arg = { .result = &res, };
 143        int err;
 144
 145        err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
 146                               flowi6_to_flowi(flp6), 0, &arg);
 147        if (err < 0)
 148                return err;
 149        *mrt = res.mrt;
 150        return 0;
 151}
 152
 153static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
 154                             int flags, struct fib_lookup_arg *arg)
 155{
 156        struct ip6mr_result *res = arg->result;
 157        struct mr6_table *mrt;
 158
 159        switch (rule->action) {
 160        case FR_ACT_TO_TBL:
 161                break;
 162        case FR_ACT_UNREACHABLE:
 163                return -ENETUNREACH;
 164        case FR_ACT_PROHIBIT:
 165                return -EACCES;
 166        case FR_ACT_BLACKHOLE:
 167        default:
 168                return -EINVAL;
 169        }
 170
 171        mrt = ip6mr_get_table(rule->fr_net, rule->table);
 172        if (mrt == NULL)
 173                return -EAGAIN;
 174        res->mrt = mrt;
 175        return 0;
 176}
 177
 178static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
 179{
 180        return 1;
 181}
 182
 183static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 184        FRA_GENERIC_POLICY,
 185};
 186
 187static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 188                                struct fib_rule_hdr *frh, struct nlattr **tb)
 189{
 190        return 0;
 191}
 192
 193static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 194                              struct nlattr **tb)
 195{
 196        return 1;
 197}
 198
 199static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 200                           struct fib_rule_hdr *frh)
 201{
 202        frh->dst_len = 0;
 203        frh->src_len = 0;
 204        frh->tos     = 0;
 205        return 0;
 206}
 207
 208static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
 209        .family         = RTNL_FAMILY_IP6MR,
 210        .rule_size      = sizeof(struct ip6mr_rule),
 211        .addr_size      = sizeof(struct in6_addr),
 212        .action         = ip6mr_rule_action,
 213        .match          = ip6mr_rule_match,
 214        .configure      = ip6mr_rule_configure,
 215        .compare        = ip6mr_rule_compare,
 216        .default_pref   = fib_default_rule_pref,
 217        .fill           = ip6mr_rule_fill,
 218        .nlgroup        = RTNLGRP_IPV6_RULE,
 219        .policy         = ip6mr_rule_policy,
 220        .owner          = THIS_MODULE,
 221};
 222
 223static int __net_init ip6mr_rules_init(struct net *net)
 224{
 225        struct fib_rules_ops *ops;
 226        struct mr6_table *mrt;
 227        int err;
 228
 229        ops = fib_rules_register(&ip6mr_rules_ops_template, net);
 230        if (IS_ERR(ops))
 231                return PTR_ERR(ops);
 232
 233        INIT_LIST_HEAD(&net->ipv6.mr6_tables);
 234
 235        mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
 236        if (mrt == NULL) {
 237                err = -ENOMEM;
 238                goto err1;
 239        }
 240
 241        err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
 242        if (err < 0)
 243                goto err2;
 244
 245        net->ipv6.mr6_rules_ops = ops;
 246        return 0;
 247
 248err2:
 249        kfree(mrt);
 250err1:
 251        fib_rules_unregister(ops);
 252        return err;
 253}
 254
 255static void __net_exit ip6mr_rules_exit(struct net *net)
 256{
 257        struct mr6_table *mrt, *next;
 258
 259        list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 260                list_del(&mrt->list);
 261                ip6mr_free_table(mrt);
 262        }
 263        fib_rules_unregister(net->ipv6.mr6_rules_ops);
 264}
 265#else
 266#define ip6mr_for_each_table(mrt, net) \
 267        for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
 268
 269static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
 270{
 271        return net->ipv6.mrt6;
 272}
 273
 274static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
 275                            struct mr6_table **mrt)
 276{
 277        *mrt = net->ipv6.mrt6;
 278        return 0;
 279}
 280
 281static int __net_init ip6mr_rules_init(struct net *net)
 282{
 283        net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
 284        return net->ipv6.mrt6 ? 0 : -ENOMEM;
 285}
 286
 287static void __net_exit ip6mr_rules_exit(struct net *net)
 288{
 289        ip6mr_free_table(net->ipv6.mrt6);
 290}
 291#endif
 292
 293static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 294{
 295        struct mr6_table *mrt;
 296        unsigned int i;
 297
 298        mrt = ip6mr_get_table(net, id);
 299        if (mrt != NULL)
 300                return mrt;
 301
 302        mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 303        if (mrt == NULL)
 304                return NULL;
 305        mrt->id = id;
 306        write_pnet(&mrt->net, net);
 307
 308        /* Forwarding cache */
 309        for (i = 0; i < MFC6_LINES; i++)
 310                INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 311
 312        INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 313
 314        setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 315                    (unsigned long)mrt);
 316
 317#ifdef CONFIG_IPV6_PIMSM_V2
 318        mrt->mroute_reg_vif_num = -1;
 319#endif
 320#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 321        list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
 322#endif
 323        return mrt;
 324}
 325
 326static void ip6mr_free_table(struct mr6_table *mrt)
 327{
 328        del_timer(&mrt->ipmr_expire_timer);
 329        mroute_clean_tables(mrt);
 330        kfree(mrt);
 331}
 332
 333#ifdef CONFIG_PROC_FS
 334
 335struct ipmr_mfc_iter {
 336        struct seq_net_private p;
 337        struct mr6_table *mrt;
 338        struct list_head *cache;
 339        int ct;
 340};
 341
 342
 343static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 344                                           struct ipmr_mfc_iter *it, loff_t pos)
 345{
 346        struct mr6_table *mrt = it->mrt;
 347        struct mfc6_cache *mfc;
 348
 349        read_lock(&mrt_lock);
 350        for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
 351                it->cache = &mrt->mfc6_cache_array[it->ct];
 352                list_for_each_entry(mfc, it->cache, list)
 353                        if (pos-- == 0)
 354                                return mfc;
 355        }
 356        read_unlock(&mrt_lock);
 357
 358        spin_lock_bh(&mfc_unres_lock);
 359        it->cache = &mrt->mfc6_unres_queue;
 360        list_for_each_entry(mfc, it->cache, list)
 361                if (pos-- == 0)
 362                        return mfc;
 363        spin_unlock_bh(&mfc_unres_lock);
 364
 365        it->cache = NULL;
 366        return NULL;
 367}
 368
 369/*
 370 *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
 371 */
 372
 373struct ipmr_vif_iter {
 374        struct seq_net_private p;
 375        struct mr6_table *mrt;
 376        int ct;
 377};
 378
 379static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 380                                            struct ipmr_vif_iter *iter,
 381                                            loff_t pos)
 382{
 383        struct mr6_table *mrt = iter->mrt;
 384
 385        for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 386                if (!MIF_EXISTS(mrt, iter->ct))
 387                        continue;
 388                if (pos-- == 0)
 389                        return &mrt->vif6_table[iter->ct];
 390        }
 391        return NULL;
 392}
 393
 394static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 395        __acquires(mrt_lock)
 396{
 397        struct ipmr_vif_iter *iter = seq->private;
 398        struct net *net = seq_file_net(seq);
 399        struct mr6_table *mrt;
 400
 401        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 402        if (mrt == NULL)
 403                return ERR_PTR(-ENOENT);
 404
 405        iter->mrt = mrt;
 406
 407        read_lock(&mrt_lock);
 408        return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
 409                : SEQ_START_TOKEN;
 410}
 411
 412static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 413{
 414        struct ipmr_vif_iter *iter = seq->private;
 415        struct net *net = seq_file_net(seq);
 416        struct mr6_table *mrt = iter->mrt;
 417
 418        ++*pos;
 419        if (v == SEQ_START_TOKEN)
 420                return ip6mr_vif_seq_idx(net, iter, 0);
 421
 422        while (++iter->ct < mrt->maxvif) {
 423                if (!MIF_EXISTS(mrt, iter->ct))
 424                        continue;
 425                return &mrt->vif6_table[iter->ct];
 426        }
 427        return NULL;
 428}
 429
 430static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 431        __releases(mrt_lock)
 432{
 433        read_unlock(&mrt_lock);
 434}
 435
 436static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 437{
 438        struct ipmr_vif_iter *iter = seq->private;
 439        struct mr6_table *mrt = iter->mrt;
 440
 441        if (v == SEQ_START_TOKEN) {
 442                seq_puts(seq,
 443                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
 444        } else {
 445                const struct mif_device *vif = v;
 446                const char *name = vif->dev ? vif->dev->name : "none";
 447
 448                seq_printf(seq,
 449                           "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
 450                           vif - mrt->vif6_table,
 451                           name, vif->bytes_in, vif->pkt_in,
 452                           vif->bytes_out, vif->pkt_out,
 453                           vif->flags);
 454        }
 455        return 0;
 456}
 457
 458static const struct seq_operations ip6mr_vif_seq_ops = {
 459        .start = ip6mr_vif_seq_start,
 460        .next  = ip6mr_vif_seq_next,
 461        .stop  = ip6mr_vif_seq_stop,
 462        .show  = ip6mr_vif_seq_show,
 463};
 464
 465static int ip6mr_vif_open(struct inode *inode, struct file *file)
 466{
 467        return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
 468                            sizeof(struct ipmr_vif_iter));
 469}
 470
 471static const struct file_operations ip6mr_vif_fops = {
 472        .owner   = THIS_MODULE,
 473        .open    = ip6mr_vif_open,
 474        .read    = seq_read,
 475        .llseek  = seq_lseek,
 476        .release = seq_release_net,
 477};
 478
 479static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 480{
 481        struct ipmr_mfc_iter *it = seq->private;
 482        struct net *net = seq_file_net(seq);
 483        struct mr6_table *mrt;
 484
 485        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
 486        if (mrt == NULL)
 487                return ERR_PTR(-ENOENT);
 488
 489        it->mrt = mrt;
 490        return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 491                : SEQ_START_TOKEN;
 492}
 493
 494static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 495{
 496        struct mfc6_cache *mfc = v;
 497        struct ipmr_mfc_iter *it = seq->private;
 498        struct net *net = seq_file_net(seq);
 499        struct mr6_table *mrt = it->mrt;
 500
 501        ++*pos;
 502
 503        if (v == SEQ_START_TOKEN)
 504                return ipmr_mfc_seq_idx(net, seq->private, 0);
 505
 506        if (mfc->list.next != it->cache)
 507                return list_entry(mfc->list.next, struct mfc6_cache, list);
 508
 509        if (it->cache == &mrt->mfc6_unres_queue)
 510                goto end_of_list;
 511
 512        BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 513
 514        while (++it->ct < MFC6_LINES) {
 515                it->cache = &mrt->mfc6_cache_array[it->ct];
 516                if (list_empty(it->cache))
 517                        continue;
 518                return list_first_entry(it->cache, struct mfc6_cache, list);
 519        }
 520
 521        /* exhausted cache_array, show unresolved */
 522        read_unlock(&mrt_lock);
 523        it->cache = &mrt->mfc6_unres_queue;
 524        it->ct = 0;
 525
 526        spin_lock_bh(&mfc_unres_lock);
 527        if (!list_empty(it->cache))
 528                return list_first_entry(it->cache, struct mfc6_cache, list);
 529
 530 end_of_list:
 531        spin_unlock_bh(&mfc_unres_lock);
 532        it->cache = NULL;
 533
 534        return NULL;
 535}
 536
 537static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 538{
 539        struct ipmr_mfc_iter *it = seq->private;
 540        struct mr6_table *mrt = it->mrt;
 541
 542        if (it->cache == &mrt->mfc6_unres_queue)
 543                spin_unlock_bh(&mfc_unres_lock);
 544        else if (it->cache == mrt->mfc6_cache_array)
 545                read_unlock(&mrt_lock);
 546}
 547
 548static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 549{
 550        int n;
 551
 552        if (v == SEQ_START_TOKEN) {
 553                seq_puts(seq,
 554                         "Group                            "
 555                         "Origin                           "
 556                         "Iif      Pkts  Bytes     Wrong  Oifs\n");
 557        } else {
 558                const struct mfc6_cache *mfc = v;
 559                const struct ipmr_mfc_iter *it = seq->private;
 560                struct mr6_table *mrt = it->mrt;
 561
 562                seq_printf(seq, "%pI6 %pI6 %-3hd",
 563                           &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 564                           mfc->mf6c_parent);
 565
 566                if (it->cache != &mrt->mfc6_unres_queue) {
 567                        seq_printf(seq, " %8lu %8lu %8lu",
 568                                   mfc->mfc_un.res.pkt,
 569                                   mfc->mfc_un.res.bytes,
 570                                   mfc->mfc_un.res.wrong_if);
 571                        for (n = mfc->mfc_un.res.minvif;
 572                             n < mfc->mfc_un.res.maxvif; n++) {
 573                                if (MIF_EXISTS(mrt, n) &&
 574                                    mfc->mfc_un.res.ttls[n] < 255)
 575                                        seq_printf(seq,
 576                                                   " %2d:%-3d",
 577                                                   n, mfc->mfc_un.res.ttls[n]);
 578                        }
 579                } else {
 580                        /* unresolved mfc_caches don't contain
 581                         * pkt, bytes and wrong_if values
 582                         */
 583                        seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
 584                }
 585                seq_putc(seq, '\n');
 586        }
 587        return 0;
 588}
 589
 590static const struct seq_operations ipmr_mfc_seq_ops = {
 591        .start = ipmr_mfc_seq_start,
 592        .next  = ipmr_mfc_seq_next,
 593        .stop  = ipmr_mfc_seq_stop,
 594        .show  = ipmr_mfc_seq_show,
 595};
 596
 597static int ipmr_mfc_open(struct inode *inode, struct file *file)
 598{
 599        return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
 600                            sizeof(struct ipmr_mfc_iter));
 601}
 602
 603static const struct file_operations ip6mr_mfc_fops = {
 604        .owner   = THIS_MODULE,
 605        .open    = ipmr_mfc_open,
 606        .read    = seq_read,
 607        .llseek  = seq_lseek,
 608        .release = seq_release_net,
 609};
 610#endif
 611
 612#ifdef CONFIG_IPV6_PIMSM_V2
 613
 614static int pim6_rcv(struct sk_buff *skb)
 615{
 616        struct pimreghdr *pim;
 617        struct ipv6hdr   *encap;
 618        struct net_device  *reg_dev = NULL;
 619        struct net *net = dev_net(skb->dev);
 620        struct mr6_table *mrt;
 621        struct flowi6 fl6 = {
 622                .flowi6_iif     = skb->dev->ifindex,
 623                .flowi6_mark    = skb->mark,
 624        };
 625        int reg_vif_num;
 626
 627        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 628                goto drop;
 629
 630        pim = (struct pimreghdr *)skb_transport_header(skb);
 631        if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 632            (pim->flags & PIM_NULL_REGISTER) ||
 633            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 634                             sizeof(*pim), IPPROTO_PIM,
 635                             csum_partial((void *)pim, sizeof(*pim), 0)) &&
 636             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 637                goto drop;
 638
 639        /* check if the inner packet is destined to mcast group */
 640        encap = (struct ipv6hdr *)(skb_transport_header(skb) +
 641                                   sizeof(*pim));
 642
 643        if (!ipv6_addr_is_multicast(&encap->daddr) ||
 644            encap->payload_len == 0 ||
 645            ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 646                goto drop;
 647
 648        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
 649                goto drop;
 650        reg_vif_num = mrt->mroute_reg_vif_num;
 651
 652        read_lock(&mrt_lock);
 653        if (reg_vif_num >= 0)
 654                reg_dev = mrt->vif6_table[reg_vif_num].dev;
 655        if (reg_dev)
 656                dev_hold(reg_dev);
 657        read_unlock(&mrt_lock);
 658
 659        if (reg_dev == NULL)
 660                goto drop;
 661
 662        skb->mac_header = skb->network_header;
 663        skb_pull(skb, (u8 *)encap - skb->data);
 664        skb_reset_network_header(skb);
 665        skb->protocol = htons(ETH_P_IPV6);
 666        skb->ip_summed = CHECKSUM_NONE;
 667        skb->pkt_type = PACKET_HOST;
 668
 669        skb_tunnel_rx(skb, reg_dev);
 670
 671        netif_rx(skb);
 672
 673        dev_put(reg_dev);
 674        return 0;
 675 drop:
 676        kfree_skb(skb);
 677        return 0;
 678}
 679
 680static const struct inet6_protocol pim6_protocol = {
 681        .handler        =       pim6_rcv,
 682};
 683
 684/* Service routines creating virtual interfaces: PIMREG */
 685
 686static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 687                                      struct net_device *dev)
 688{
 689        struct net *net = dev_net(dev);
 690        struct mr6_table *mrt;
 691        struct flowi6 fl6 = {
 692                .flowi6_oif     = dev->ifindex,
 693                .flowi6_iif     = skb->skb_iif,
 694                .flowi6_mark    = skb->mark,
 695        };
 696        int err;
 697
 698        err = ip6mr_fib_lookup(net, &fl6, &mrt);
 699        if (err < 0) {
 700                kfree_skb(skb);
 701                return err;
 702        }
 703
 704        read_lock(&mrt_lock);
 705        dev->stats.tx_bytes += skb->len;
 706        dev->stats.tx_packets++;
 707        ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 708        read_unlock(&mrt_lock);
 709        kfree_skb(skb);
 710        return NETDEV_TX_OK;
 711}
 712
 713static const struct net_device_ops reg_vif_netdev_ops = {
 714        .ndo_start_xmit = reg_vif_xmit,
 715};
 716
 717static void reg_vif_setup(struct net_device *dev)
 718{
 719        dev->type               = ARPHRD_PIMREG;
 720        dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
 721        dev->flags              = IFF_NOARP;
 722        dev->netdev_ops         = &reg_vif_netdev_ops;
 723        dev->destructor         = free_netdev;
 724        dev->features           |= NETIF_F_NETNS_LOCAL;
 725}
 726
 727static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 728{
 729        struct net_device *dev;
 730        char name[IFNAMSIZ];
 731
 732        if (mrt->id == RT6_TABLE_DFLT)
 733                sprintf(name, "pim6reg");
 734        else
 735                sprintf(name, "pim6reg%u", mrt->id);
 736
 737        dev = alloc_netdev(0, name, reg_vif_setup);
 738        if (dev == NULL)
 739                return NULL;
 740
 741        dev_net_set(dev, net);
 742
 743        if (register_netdevice(dev)) {
 744                free_netdev(dev);
 745                return NULL;
 746        }
 747        dev->iflink = 0;
 748
 749        if (dev_open(dev))
 750                goto failure;
 751
 752        dev_hold(dev);
 753        return dev;
 754
 755failure:
 756        /* allow the register to be completed before unregistering. */
 757        rtnl_unlock();
 758        rtnl_lock();
 759
 760        unregister_netdevice(dev);
 761        return NULL;
 762}
 763#endif
 764
 765/*
 766 *      Delete a VIF entry
 767 */
 768
 769static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 770{
 771        struct mif_device *v;
 772        struct net_device *dev;
 773        struct inet6_dev *in6_dev;
 774
 775        if (vifi < 0 || vifi >= mrt->maxvif)
 776                return -EADDRNOTAVAIL;
 777
 778        v = &mrt->vif6_table[vifi];
 779
 780        write_lock_bh(&mrt_lock);
 781        dev = v->dev;
 782        v->dev = NULL;
 783
 784        if (!dev) {
 785                write_unlock_bh(&mrt_lock);
 786                return -EADDRNOTAVAIL;
 787        }
 788
 789#ifdef CONFIG_IPV6_PIMSM_V2
 790        if (vifi == mrt->mroute_reg_vif_num)
 791                mrt->mroute_reg_vif_num = -1;
 792#endif
 793
 794        if (vifi + 1 == mrt->maxvif) {
 795                int tmp;
 796                for (tmp = vifi - 1; tmp >= 0; tmp--) {
 797                        if (MIF_EXISTS(mrt, tmp))
 798                                break;
 799                }
 800                mrt->maxvif = tmp + 1;
 801        }
 802
 803        write_unlock_bh(&mrt_lock);
 804
 805        dev_set_allmulti(dev, -1);
 806
 807        in6_dev = __in6_dev_get(dev);
 808        if (in6_dev)
 809                in6_dev->cnf.mc_forwarding--;
 810
 811        if (v->flags & MIFF_REGISTER)
 812                unregister_netdevice_queue(dev, head);
 813
 814        dev_put(dev);
 815        return 0;
 816}
 817
 818static inline void ip6mr_cache_free(struct mfc6_cache *c)
 819{
 820        kmem_cache_free(mrt_cachep, c);
 821}
 822
 823/* Destroy an unresolved cache entry, killing queued skbs
 824   and reporting error to netlink readers.
 825 */
 826
 827static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 828{
 829        struct net *net = read_pnet(&mrt->net);
 830        struct sk_buff *skb;
 831
 832        atomic_dec(&mrt->cache_resolve_queue_len);
 833
 834        while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 835                if (ipv6_hdr(skb)->version == 0) {
 836                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 837                        nlh->nlmsg_type = NLMSG_ERROR;
 838                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 839                        skb_trim(skb, nlh->nlmsg_len);
 840                        ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
 841                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 842                } else
 843                        kfree_skb(skb);
 844        }
 845
 846        ip6mr_cache_free(c);
 847}
 848
 849
 850/* Timer process for all the unresolved queue. */
 851
 852static void ipmr_do_expire_process(struct mr6_table *mrt)
 853{
 854        unsigned long now = jiffies;
 855        unsigned long expires = 10 * HZ;
 856        struct mfc6_cache *c, *next;
 857
 858        list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 859                if (time_after(c->mfc_un.unres.expires, now)) {
 860                        /* not yet... */
 861                        unsigned long interval = c->mfc_un.unres.expires - now;
 862                        if (interval < expires)
 863                                expires = interval;
 864                        continue;
 865                }
 866
 867                list_del(&c->list);
 868                ip6mr_destroy_unres(mrt, c);
 869        }
 870
 871        if (!list_empty(&mrt->mfc6_unres_queue))
 872                mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 873}
 874
 875static void ipmr_expire_process(unsigned long arg)
 876{
 877        struct mr6_table *mrt = (struct mr6_table *)arg;
 878
 879        if (!spin_trylock(&mfc_unres_lock)) {
 880                mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 881                return;
 882        }
 883
 884        if (!list_empty(&mrt->mfc6_unres_queue))
 885                ipmr_do_expire_process(mrt);
 886
 887        spin_unlock(&mfc_unres_lock);
 888}
 889
 890/* Fill oifs list. It is called under write locked mrt_lock. */
 891
 892static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 893                                    unsigned char *ttls)
 894{
 895        int vifi;
 896
 897        cache->mfc_un.res.minvif = MAXMIFS;
 898        cache->mfc_un.res.maxvif = 0;
 899        memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 900
 901        for (vifi = 0; vifi < mrt->maxvif; vifi++) {
 902                if (MIF_EXISTS(mrt, vifi) &&
 903                    ttls[vifi] && ttls[vifi] < 255) {
 904                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 905                        if (cache->mfc_un.res.minvif > vifi)
 906                                cache->mfc_un.res.minvif = vifi;
 907                        if (cache->mfc_un.res.maxvif <= vifi)
 908                                cache->mfc_un.res.maxvif = vifi + 1;
 909                }
 910        }
 911}
 912
 913static int mif6_add(struct net *net, struct mr6_table *mrt,
 914                    struct mif6ctl *vifc, int mrtsock)
 915{
 916        int vifi = vifc->mif6c_mifi;
 917        struct mif_device *v = &mrt->vif6_table[vifi];
 918        struct net_device *dev;
 919        struct inet6_dev *in6_dev;
 920        int err;
 921
 922        /* Is vif busy ? */
 923        if (MIF_EXISTS(mrt, vifi))
 924                return -EADDRINUSE;
 925
 926        switch (vifc->mif6c_flags) {
 927#ifdef CONFIG_IPV6_PIMSM_V2
 928        case MIFF_REGISTER:
 929                /*
 930                 * Special Purpose VIF in PIM
 931                 * All the packets will be sent to the daemon
 932                 */
 933                if (mrt->mroute_reg_vif_num >= 0)
 934                        return -EADDRINUSE;
 935                dev = ip6mr_reg_vif(net, mrt);
 936                if (!dev)
 937                        return -ENOBUFS;
 938                err = dev_set_allmulti(dev, 1);
 939                if (err) {
 940                        unregister_netdevice(dev);
 941                        dev_put(dev);
 942                        return err;
 943                }
 944                break;
 945#endif
 946        case 0:
 947                dev = dev_get_by_index(net, vifc->mif6c_pifi);
 948                if (!dev)
 949                        return -EADDRNOTAVAIL;
 950                err = dev_set_allmulti(dev, 1);
 951                if (err) {
 952                        dev_put(dev);
 953                        return err;
 954                }
 955                break;
 956        default:
 957                return -EINVAL;
 958        }
 959
 960        in6_dev = __in6_dev_get(dev);
 961        if (in6_dev)
 962                in6_dev->cnf.mc_forwarding++;
 963
 964        /*
 965         *      Fill in the VIF structures
 966         */
 967        v->rate_limit = vifc->vifc_rate_limit;
 968        v->flags = vifc->mif6c_flags;
 969        if (!mrtsock)
 970                v->flags |= VIFF_STATIC;
 971        v->threshold = vifc->vifc_threshold;
 972        v->bytes_in = 0;
 973        v->bytes_out = 0;
 974        v->pkt_in = 0;
 975        v->pkt_out = 0;
 976        v->link = dev->ifindex;
 977        if (v->flags & MIFF_REGISTER)
 978                v->link = dev->iflink;
 979
 980        /* And finish update writing critical data */
 981        write_lock_bh(&mrt_lock);
 982        v->dev = dev;
 983#ifdef CONFIG_IPV6_PIMSM_V2
 984        if (v->flags & MIFF_REGISTER)
 985                mrt->mroute_reg_vif_num = vifi;
 986#endif
 987        if (vifi + 1 > mrt->maxvif)
 988                mrt->maxvif = vifi + 1;
 989        write_unlock_bh(&mrt_lock);
 990        return 0;
 991}
 992
 993static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 994                                           const struct in6_addr *origin,
 995                                           const struct in6_addr *mcastgrp)
 996{
 997        int line = MFC6_HASH(mcastgrp, origin);
 998        struct mfc6_cache *c;
 999
1000        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1001                if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1002                    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1003                        return c;
1004        }
1005        return NULL;
1006}
1007
1008/*
1009 *      Allocate a multicast cache entry
1010 */
1011static struct mfc6_cache *ip6mr_cache_alloc(void)
1012{
1013        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1014        if (c == NULL)
1015                return NULL;
1016        c->mfc_un.res.minvif = MAXMIFS;
1017        return c;
1018}
1019
1020static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1021{
1022        struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1023        if (c == NULL)
1024                return NULL;
1025        skb_queue_head_init(&c->mfc_un.unres.unresolved);
1026        c->mfc_un.unres.expires = jiffies + 10 * HZ;
1027        return c;
1028}
1029
1030/*
1031 *      A cache entry has gone into a resolved state from queued
1032 */
1033
1034static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1035                                struct mfc6_cache *uc, struct mfc6_cache *c)
1036{
1037        struct sk_buff *skb;
1038
1039        /*
1040         *      Play the pending entries through our router
1041         */
1042
1043        while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1044                if (ipv6_hdr(skb)->version == 0) {
1045                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1046
1047                        if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1048                                nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1049                        } else {
1050                                nlh->nlmsg_type = NLMSG_ERROR;
1051                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1052                                skb_trim(skb, nlh->nlmsg_len);
1053                                ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1054                        }
1055                        rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1056                } else
1057                        ip6_mr_forward(net, mrt, skb, c);
1058        }
1059}
1060
1061/*
1062 *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1063 *      expects the following bizarre scheme.
1064 *
1065 *      Called under mrt_lock.
1066 */
1067
1068static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1069                              mifi_t mifi, int assert)
1070{
1071        struct sk_buff *skb;
1072        struct mrt6msg *msg;
1073        int ret;
1074
1075#ifdef CONFIG_IPV6_PIMSM_V2
1076        if (assert == MRT6MSG_WHOLEPKT)
1077                skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1078                                                +sizeof(*msg));
1079        else
1080#endif
1081                skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1082
1083        if (!skb)
1084                return -ENOBUFS;
1085
1086        /* I suppose that internal messages
1087         * do not require checksums */
1088
1089        skb->ip_summed = CHECKSUM_UNNECESSARY;
1090
1091#ifdef CONFIG_IPV6_PIMSM_V2
1092        if (assert == MRT6MSG_WHOLEPKT) {
1093                /* Ugly, but we have no choice with this interface.
1094                   Duplicate old header, fix length etc.
1095                   And all this only to mangle msg->im6_msgtype and
1096                   to set msg->im6_mbz to "mbz" :-)
1097                 */
1098                skb_push(skb, -skb_network_offset(pkt));
1099
1100                skb_push(skb, sizeof(*msg));
1101                skb_reset_transport_header(skb);
1102                msg = (struct mrt6msg *)skb_transport_header(skb);
1103                msg->im6_mbz = 0;
1104                msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1105                msg->im6_mif = mrt->mroute_reg_vif_num;
1106                msg->im6_pad = 0;
1107                msg->im6_src = ipv6_hdr(pkt)->saddr;
1108                msg->im6_dst = ipv6_hdr(pkt)->daddr;
1109
1110                skb->ip_summed = CHECKSUM_UNNECESSARY;
1111        } else
1112#endif
1113        {
1114        /*
1115         *      Copy the IP header
1116         */
1117
1118        skb_put(skb, sizeof(struct ipv6hdr));
1119        skb_reset_network_header(skb);
1120        skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1121
1122        /*
1123         *      Add our header
1124         */
1125        skb_put(skb, sizeof(*msg));
1126        skb_reset_transport_header(skb);
1127        msg = (struct mrt6msg *)skb_transport_header(skb);
1128
1129        msg->im6_mbz = 0;
1130        msg->im6_msgtype = assert;
1131        msg->im6_mif = mifi;
1132        msg->im6_pad = 0;
1133        msg->im6_src = ipv6_hdr(pkt)->saddr;
1134        msg->im6_dst = ipv6_hdr(pkt)->daddr;
1135
1136        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1137        skb->ip_summed = CHECKSUM_UNNECESSARY;
1138        }
1139
1140        if (mrt->mroute6_sk == NULL) {
1141                kfree_skb(skb);
1142                return -EINVAL;
1143        }
1144
1145        /*
1146         *      Deliver to user space multicast routing algorithms
1147         */
1148        ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1149        if (ret < 0) {
1150                net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1151                kfree_skb(skb);
1152        }
1153
1154        return ret;
1155}
1156
1157/*
1158 *      Queue a packet for resolution. It gets locked cache entry!
1159 */
1160
1161static int
1162ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1163{
1164        bool found = false;
1165        int err;
1166        struct mfc6_cache *c;
1167
1168        spin_lock_bh(&mfc_unres_lock);
1169        list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1170                if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1171                    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1172                        found = true;
1173                        break;
1174                }
1175        }
1176
1177        if (!found) {
1178                /*
1179                 *      Create a new entry if allowable
1180                 */
1181
1182                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1183                    (c = ip6mr_cache_alloc_unres()) == NULL) {
1184                        spin_unlock_bh(&mfc_unres_lock);
1185
1186                        kfree_skb(skb);
1187                        return -ENOBUFS;
1188                }
1189
1190                /*
1191                 *      Fill in the new cache entry
1192                 */
1193                c->mf6c_parent = -1;
1194                c->mf6c_origin = ipv6_hdr(skb)->saddr;
1195                c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1196
1197                /*
1198                 *      Reflect first query at pim6sd
1199                 */
1200                err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1201                if (err < 0) {
1202                        /* If the report failed throw the cache entry
1203                           out - Brad Parker
1204                         */
1205                        spin_unlock_bh(&mfc_unres_lock);
1206
1207                        ip6mr_cache_free(c);
1208                        kfree_skb(skb);
1209                        return err;
1210                }
1211
1212                atomic_inc(&mrt->cache_resolve_queue_len);
1213                list_add(&c->list, &mrt->mfc6_unres_queue);
1214
1215                ipmr_do_expire_process(mrt);
1216        }
1217
1218        /*
1219         *      See if we can append the packet
1220         */
1221        if (c->mfc_un.unres.unresolved.qlen > 3) {
1222                kfree_skb(skb);
1223                err = -ENOBUFS;
1224        } else {
1225                skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1226                err = 0;
1227        }
1228
1229        spin_unlock_bh(&mfc_unres_lock);
1230        return err;
1231}
1232
1233/*
1234 *      MFC6 cache manipulation by user space
1235 */
1236
1237static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
1238{
1239        int line;
1240        struct mfc6_cache *c, *next;
1241
1242        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1243
1244        list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1245                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1246                    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1247                        write_lock_bh(&mrt_lock);
1248                        list_del(&c->list);
1249                        write_unlock_bh(&mrt_lock);
1250
1251                        ip6mr_cache_free(c);
1252                        return 0;
1253                }
1254        }
1255        return -ENOENT;
1256}
1257
1258static int ip6mr_device_event(struct notifier_block *this,
1259                              unsigned long event, void *ptr)
1260{
1261        struct net_device *dev = ptr;
1262        struct net *net = dev_net(dev);
1263        struct mr6_table *mrt;
1264        struct mif_device *v;
1265        int ct;
1266        LIST_HEAD(list);
1267
1268        if (event != NETDEV_UNREGISTER)
1269                return NOTIFY_DONE;
1270
1271        ip6mr_for_each_table(mrt, net) {
1272                v = &mrt->vif6_table[0];
1273                for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1274                        if (v->dev == dev)
1275                                mif6_delete(mrt, ct, &list);
1276                }
1277        }
1278        unregister_netdevice_many(&list);
1279
1280        return NOTIFY_DONE;
1281}
1282
1283static struct notifier_block ip6_mr_notifier = {
1284        .notifier_call = ip6mr_device_event
1285};
1286
1287/*
1288 *      Setup for IP multicast routing
1289 */
1290
1291static int __net_init ip6mr_net_init(struct net *net)
1292{
1293        int err;
1294
1295        err = ip6mr_rules_init(net);
1296        if (err < 0)
1297                goto fail;
1298
1299#ifdef CONFIG_PROC_FS
1300        err = -ENOMEM;
1301        if (!proc_net_fops_create(net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1302                goto proc_vif_fail;
1303        if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1304                goto proc_cache_fail;
1305#endif
1306
1307        return 0;
1308
1309#ifdef CONFIG_PROC_FS
1310proc_cache_fail:
1311        proc_net_remove(net, "ip6_mr_vif");
1312proc_vif_fail:
1313        ip6mr_rules_exit(net);
1314#endif
1315fail:
1316        return err;
1317}
1318
1319static void __net_exit ip6mr_net_exit(struct net *net)
1320{
1321#ifdef CONFIG_PROC_FS
1322        proc_net_remove(net, "ip6_mr_cache");
1323        proc_net_remove(net, "ip6_mr_vif");
1324#endif
1325        ip6mr_rules_exit(net);
1326}
1327
1328static struct pernet_operations ip6mr_net_ops = {
1329        .init = ip6mr_net_init,
1330        .exit = ip6mr_net_exit,
1331};
1332
1333int __init ip6_mr_init(void)
1334{
1335        int err;
1336
1337        mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1338                                       sizeof(struct mfc6_cache),
1339                                       0, SLAB_HWCACHE_ALIGN,
1340                                       NULL);
1341        if (!mrt_cachep)
1342                return -ENOMEM;
1343
1344        err = register_pernet_subsys(&ip6mr_net_ops);
1345        if (err)
1346                goto reg_pernet_fail;
1347
1348        err = register_netdevice_notifier(&ip6_mr_notifier);
1349        if (err)
1350                goto reg_notif_fail;
1351#ifdef CONFIG_IPV6_PIMSM_V2
1352        if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1353                pr_err("%s: can't add PIM protocol\n", __func__);
1354                err = -EAGAIN;
1355                goto add_proto_fail;
1356        }
1357#endif
1358        rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1359                      ip6mr_rtm_dumproute, NULL);
1360        return 0;
1361#ifdef CONFIG_IPV6_PIMSM_V2
1362add_proto_fail:
1363        unregister_netdevice_notifier(&ip6_mr_notifier);
1364#endif
1365reg_notif_fail:
1366        unregister_pernet_subsys(&ip6mr_net_ops);
1367reg_pernet_fail:
1368        kmem_cache_destroy(mrt_cachep);
1369        return err;
1370}
1371
1372void ip6_mr_cleanup(void)
1373{
1374        unregister_netdevice_notifier(&ip6_mr_notifier);
1375        unregister_pernet_subsys(&ip6mr_net_ops);
1376        kmem_cache_destroy(mrt_cachep);
1377}
1378
1379static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1380                         struct mf6cctl *mfc, int mrtsock)
1381{
1382        bool found = false;
1383        int line;
1384        struct mfc6_cache *uc, *c;
1385        unsigned char ttls[MAXMIFS];
1386        int i;
1387
1388        if (mfc->mf6cc_parent >= MAXMIFS)
1389                return -ENFILE;
1390
1391        memset(ttls, 255, MAXMIFS);
1392        for (i = 0; i < MAXMIFS; i++) {
1393                if (IF_ISSET(i, &mfc->mf6cc_ifset))
1394                        ttls[i] = 1;
1395
1396        }
1397
1398        line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1399
1400        list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1401                if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1402                    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1403                        found = true;
1404                        break;
1405                }
1406        }
1407
1408        if (found) {
1409                write_lock_bh(&mrt_lock);
1410                c->mf6c_parent = mfc->mf6cc_parent;
1411                ip6mr_update_thresholds(mrt, c, ttls);
1412                if (!mrtsock)
1413                        c->mfc_flags |= MFC_STATIC;
1414                write_unlock_bh(&mrt_lock);
1415                return 0;
1416        }
1417
1418        if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1419                return -EINVAL;
1420
1421        c = ip6mr_cache_alloc();
1422        if (c == NULL)
1423                return -ENOMEM;
1424
1425        c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1426        c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1427        c->mf6c_parent = mfc->mf6cc_parent;
1428        ip6mr_update_thresholds(mrt, c, ttls);
1429        if (!mrtsock)
1430                c->mfc_flags |= MFC_STATIC;
1431
1432        write_lock_bh(&mrt_lock);
1433        list_add(&c->list, &mrt->mfc6_cache_array[line]);
1434        write_unlock_bh(&mrt_lock);
1435
1436        /*
1437         *      Check to see if we resolved a queued list. If so we
1438         *      need to send on the frames and tidy up.
1439         */
1440        found = false;
1441        spin_lock_bh(&mfc_unres_lock);
1442        list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1443                if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1444                    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1445                        list_del(&uc->list);
1446                        atomic_dec(&mrt->cache_resolve_queue_len);
1447                        found = true;
1448                        break;
1449                }
1450        }
1451        if (list_empty(&mrt->mfc6_unres_queue))
1452                del_timer(&mrt->ipmr_expire_timer);
1453        spin_unlock_bh(&mfc_unres_lock);
1454
1455        if (found) {
1456                ip6mr_cache_resolve(net, mrt, uc, c);
1457                ip6mr_cache_free(uc);
1458        }
1459        return 0;
1460}
1461
1462/*
1463 *      Close the multicast socket, and clear the vif tables etc
1464 */
1465
1466static void mroute_clean_tables(struct mr6_table *mrt)
1467{
1468        int i;
1469        LIST_HEAD(list);
1470        struct mfc6_cache *c, *next;
1471
1472        /*
1473         *      Shut down all active vif entries
1474         */
1475        for (i = 0; i < mrt->maxvif; i++) {
1476                if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1477                        mif6_delete(mrt, i, &list);
1478        }
1479        unregister_netdevice_many(&list);
1480
1481        /*
1482         *      Wipe the cache
1483         */
1484        for (i = 0; i < MFC6_LINES; i++) {
1485                list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1486                        if (c->mfc_flags & MFC_STATIC)
1487                                continue;
1488                        write_lock_bh(&mrt_lock);
1489                        list_del(&c->list);
1490                        write_unlock_bh(&mrt_lock);
1491
1492                        ip6mr_cache_free(c);
1493                }
1494        }
1495
1496        if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1497                spin_lock_bh(&mfc_unres_lock);
1498                list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1499                        list_del(&c->list);
1500                        ip6mr_destroy_unres(mrt, c);
1501                }
1502                spin_unlock_bh(&mfc_unres_lock);
1503        }
1504}
1505
1506static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1507{
1508        int err = 0;
1509        struct net *net = sock_net(sk);
1510
1511        rtnl_lock();
1512        write_lock_bh(&mrt_lock);
1513        if (likely(mrt->mroute6_sk == NULL)) {
1514                mrt->mroute6_sk = sk;
1515                net->ipv6.devconf_all->mc_forwarding++;
1516        }
1517        else
1518                err = -EADDRINUSE;
1519        write_unlock_bh(&mrt_lock);
1520
1521        rtnl_unlock();
1522
1523        return err;
1524}
1525
1526int ip6mr_sk_done(struct sock *sk)
1527{
1528        int err = -EACCES;
1529        struct net *net = sock_net(sk);
1530        struct mr6_table *mrt;
1531
1532        rtnl_lock();
1533        ip6mr_for_each_table(mrt, net) {
1534                if (sk == mrt->mroute6_sk) {
1535                        write_lock_bh(&mrt_lock);
1536                        mrt->mroute6_sk = NULL;
1537                        net->ipv6.devconf_all->mc_forwarding--;
1538                        write_unlock_bh(&mrt_lock);
1539
1540                        mroute_clean_tables(mrt);
1541                        err = 0;
1542                        break;
1543                }
1544        }
1545        rtnl_unlock();
1546
1547        return err;
1548}
1549
1550struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1551{
1552        struct mr6_table *mrt;
1553        struct flowi6 fl6 = {
1554                .flowi6_iif     = skb->skb_iif,
1555                .flowi6_oif     = skb->dev->ifindex,
1556                .flowi6_mark    = skb->mark,
1557        };
1558
1559        if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1560                return NULL;
1561
1562        return mrt->mroute6_sk;
1563}
1564
1565/*
1566 *      Socket options and virtual interface manipulation. The whole
1567 *      virtual interface system is a complete heap, but unfortunately
1568 *      that's how BSD mrouted happens to think. Maybe one day with a proper
1569 *      MOSPF/PIM router set up we can clean this up.
1570 */
1571
1572int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1573{
1574        int ret;
1575        struct mif6ctl vif;
1576        struct mf6cctl mfc;
1577        mifi_t mifi;
1578        struct net *net = sock_net(sk);
1579        struct mr6_table *mrt;
1580
1581        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1582        if (mrt == NULL)
1583                return -ENOENT;
1584
1585        if (optname != MRT6_INIT) {
1586                if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1587                        return -EACCES;
1588        }
1589
1590        switch (optname) {
1591        case MRT6_INIT:
1592                if (sk->sk_type != SOCK_RAW ||
1593                    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1594                        return -EOPNOTSUPP;
1595                if (optlen < sizeof(int))
1596                        return -EINVAL;
1597
1598                return ip6mr_sk_init(mrt, sk);
1599
1600        case MRT6_DONE:
1601                return ip6mr_sk_done(sk);
1602
1603        case MRT6_ADD_MIF:
1604                if (optlen < sizeof(vif))
1605                        return -EINVAL;
1606                if (copy_from_user(&vif, optval, sizeof(vif)))
1607                        return -EFAULT;
1608                if (vif.mif6c_mifi >= MAXMIFS)
1609                        return -ENFILE;
1610                rtnl_lock();
1611                ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1612                rtnl_unlock();
1613                return ret;
1614
1615        case MRT6_DEL_MIF:
1616                if (optlen < sizeof(mifi_t))
1617                        return -EINVAL;
1618                if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1619                        return -EFAULT;
1620                rtnl_lock();
1621                ret = mif6_delete(mrt, mifi, NULL);
1622                rtnl_unlock();
1623                return ret;
1624
1625        /*
1626         *      Manipulate the forwarding caches. These live
1627         *      in a sort of kernel/user symbiosis.
1628         */
1629        case MRT6_ADD_MFC:
1630        case MRT6_DEL_MFC:
1631                if (optlen < sizeof(mfc))
1632                        return -EINVAL;
1633                if (copy_from_user(&mfc, optval, sizeof(mfc)))
1634                        return -EFAULT;
1635                rtnl_lock();
1636                if (optname == MRT6_DEL_MFC)
1637                        ret = ip6mr_mfc_delete(mrt, &mfc);
1638                else
1639                        ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1640                rtnl_unlock();
1641                return ret;
1642
1643        /*
1644         *      Control PIM assert (to activate pim will activate assert)
1645         */
1646        case MRT6_ASSERT:
1647        {
1648                int v;
1649                if (get_user(v, (int __user *)optval))
1650                        return -EFAULT;
1651                mrt->mroute_do_assert = !!v;
1652                return 0;
1653        }
1654
1655#ifdef CONFIG_IPV6_PIMSM_V2
1656        case MRT6_PIM:
1657        {
1658                int v;
1659                if (get_user(v, (int __user *)optval))
1660                        return -EFAULT;
1661                v = !!v;
1662                rtnl_lock();
1663                ret = 0;
1664                if (v != mrt->mroute_do_pim) {
1665                        mrt->mroute_do_pim = v;
1666                        mrt->mroute_do_assert = v;
1667                }
1668                rtnl_unlock();
1669                return ret;
1670        }
1671
1672#endif
1673#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1674        case MRT6_TABLE:
1675        {
1676                u32 v;
1677
1678                if (optlen != sizeof(u32))
1679                        return -EINVAL;
1680                if (get_user(v, (u32 __user *)optval))
1681                        return -EFAULT;
1682                if (sk == mrt->mroute6_sk)
1683                        return -EBUSY;
1684
1685                rtnl_lock();
1686                ret = 0;
1687                if (!ip6mr_new_table(net, v))
1688                        ret = -ENOMEM;
1689                raw6_sk(sk)->ip6mr_table = v;
1690                rtnl_unlock();
1691                return ret;
1692        }
1693#endif
1694        /*
1695         *      Spurious command, or MRT6_VERSION which you cannot
1696         *      set.
1697         */
1698        default:
1699                return -ENOPROTOOPT;
1700        }
1701}
1702
1703/*
1704 *      Getsock opt support for the multicast routing system.
1705 */
1706
1707int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1708                          int __user *optlen)
1709{
1710        int olr;
1711        int val;
1712        struct net *net = sock_net(sk);
1713        struct mr6_table *mrt;
1714
1715        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1716        if (mrt == NULL)
1717                return -ENOENT;
1718
1719        switch (optname) {
1720        case MRT6_VERSION:
1721                val = 0x0305;
1722                break;
1723#ifdef CONFIG_IPV6_PIMSM_V2
1724        case MRT6_PIM:
1725                val = mrt->mroute_do_pim;
1726                break;
1727#endif
1728        case MRT6_ASSERT:
1729                val = mrt->mroute_do_assert;
1730                break;
1731        default:
1732                return -ENOPROTOOPT;
1733        }
1734
1735        if (get_user(olr, optlen))
1736                return -EFAULT;
1737
1738        olr = min_t(int, olr, sizeof(int));
1739        if (olr < 0)
1740                return -EINVAL;
1741
1742        if (put_user(olr, optlen))
1743                return -EFAULT;
1744        if (copy_to_user(optval, &val, olr))
1745                return -EFAULT;
1746        return 0;
1747}
1748
1749/*
1750 *      The IP multicast ioctl support routines.
1751 */
1752
1753int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1754{
1755        struct sioc_sg_req6 sr;
1756        struct sioc_mif_req6 vr;
1757        struct mif_device *vif;
1758        struct mfc6_cache *c;
1759        struct net *net = sock_net(sk);
1760        struct mr6_table *mrt;
1761
1762        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1763        if (mrt == NULL)
1764                return -ENOENT;
1765
1766        switch (cmd) {
1767        case SIOCGETMIFCNT_IN6:
1768                if (copy_from_user(&vr, arg, sizeof(vr)))
1769                        return -EFAULT;
1770                if (vr.mifi >= mrt->maxvif)
1771                        return -EINVAL;
1772                read_lock(&mrt_lock);
1773                vif = &mrt->vif6_table[vr.mifi];
1774                if (MIF_EXISTS(mrt, vr.mifi)) {
1775                        vr.icount = vif->pkt_in;
1776                        vr.ocount = vif->pkt_out;
1777                        vr.ibytes = vif->bytes_in;
1778                        vr.obytes = vif->bytes_out;
1779                        read_unlock(&mrt_lock);
1780
1781                        if (copy_to_user(arg, &vr, sizeof(vr)))
1782                                return -EFAULT;
1783                        return 0;
1784                }
1785                read_unlock(&mrt_lock);
1786                return -EADDRNOTAVAIL;
1787        case SIOCGETSGCNT_IN6:
1788                if (copy_from_user(&sr, arg, sizeof(sr)))
1789                        return -EFAULT;
1790
1791                read_lock(&mrt_lock);
1792                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1793                if (c) {
1794                        sr.pktcnt = c->mfc_un.res.pkt;
1795                        sr.bytecnt = c->mfc_un.res.bytes;
1796                        sr.wrong_if = c->mfc_un.res.wrong_if;
1797                        read_unlock(&mrt_lock);
1798
1799                        if (copy_to_user(arg, &sr, sizeof(sr)))
1800                                return -EFAULT;
1801                        return 0;
1802                }
1803                read_unlock(&mrt_lock);
1804                return -EADDRNOTAVAIL;
1805        default:
1806                return -ENOIOCTLCMD;
1807        }
1808}
1809
1810#ifdef CONFIG_COMPAT
1811struct compat_sioc_sg_req6 {
1812        struct sockaddr_in6 src;
1813        struct sockaddr_in6 grp;
1814        compat_ulong_t pktcnt;
1815        compat_ulong_t bytecnt;
1816        compat_ulong_t wrong_if;
1817};
1818
1819struct compat_sioc_mif_req6 {
1820        mifi_t  mifi;
1821        compat_ulong_t icount;
1822        compat_ulong_t ocount;
1823        compat_ulong_t ibytes;
1824        compat_ulong_t obytes;
1825};
1826
1827int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1828{
1829        struct compat_sioc_sg_req6 sr;
1830        struct compat_sioc_mif_req6 vr;
1831        struct mif_device *vif;
1832        struct mfc6_cache *c;
1833        struct net *net = sock_net(sk);
1834        struct mr6_table *mrt;
1835
1836        mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1837        if (mrt == NULL)
1838                return -ENOENT;
1839
1840        switch (cmd) {
1841        case SIOCGETMIFCNT_IN6:
1842                if (copy_from_user(&vr, arg, sizeof(vr)))
1843                        return -EFAULT;
1844                if (vr.mifi >= mrt->maxvif)
1845                        return -EINVAL;
1846                read_lock(&mrt_lock);
1847                vif = &mrt->vif6_table[vr.mifi];
1848                if (MIF_EXISTS(mrt, vr.mifi)) {
1849                        vr.icount = vif->pkt_in;
1850                        vr.ocount = vif->pkt_out;
1851                        vr.ibytes = vif->bytes_in;
1852                        vr.obytes = vif->bytes_out;
1853                        read_unlock(&mrt_lock);
1854
1855                        if (copy_to_user(arg, &vr, sizeof(vr)))
1856                                return -EFAULT;
1857                        return 0;
1858                }
1859                read_unlock(&mrt_lock);
1860                return -EADDRNOTAVAIL;
1861        case SIOCGETSGCNT_IN6:
1862                if (copy_from_user(&sr, arg, sizeof(sr)))
1863                        return -EFAULT;
1864
1865                read_lock(&mrt_lock);
1866                c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1867                if (c) {
1868                        sr.pktcnt = c->mfc_un.res.pkt;
1869                        sr.bytecnt = c->mfc_un.res.bytes;
1870                        sr.wrong_if = c->mfc_un.res.wrong_if;
1871                        read_unlock(&mrt_lock);
1872
1873                        if (copy_to_user(arg, &sr, sizeof(sr)))
1874                                return -EFAULT;
1875                        return 0;
1876                }
1877                read_unlock(&mrt_lock);
1878                return -EADDRNOTAVAIL;
1879        default:
1880                return -ENOIOCTLCMD;
1881        }
1882}
1883#endif
1884
1885static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1886{
1887        IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1888                         IPSTATS_MIB_OUTFORWDATAGRAMS);
1889        IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1890                         IPSTATS_MIB_OUTOCTETS, skb->len);
1891        return dst_output(skb);
1892}
1893
1894/*
1895 *      Processing handlers for ip6mr_forward
1896 */
1897
1898static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1899                          struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1900{
1901        struct ipv6hdr *ipv6h;
1902        struct mif_device *vif = &mrt->vif6_table[vifi];
1903        struct net_device *dev;
1904        struct dst_entry *dst;
1905        struct flowi6 fl6;
1906
1907        if (vif->dev == NULL)
1908                goto out_free;
1909
1910#ifdef CONFIG_IPV6_PIMSM_V2
1911        if (vif->flags & MIFF_REGISTER) {
1912                vif->pkt_out++;
1913                vif->bytes_out += skb->len;
1914                vif->dev->stats.tx_bytes += skb->len;
1915                vif->dev->stats.tx_packets++;
1916                ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1917                goto out_free;
1918        }
1919#endif
1920
1921        ipv6h = ipv6_hdr(skb);
1922
1923        fl6 = (struct flowi6) {
1924                .flowi6_oif = vif->link,
1925                .daddr = ipv6h->daddr,
1926        };
1927
1928        dst = ip6_route_output(net, NULL, &fl6);
1929        if (dst->error) {
1930                dst_release(dst);
1931                goto out_free;
1932        }
1933
1934        skb_dst_drop(skb);
1935        skb_dst_set(skb, dst);
1936
1937        /*
1938         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1939         * not only before forwarding, but after forwarding on all output
1940         * interfaces. It is clear, if mrouter runs a multicasting
1941         * program, it should receive packets not depending to what interface
1942         * program is joined.
1943         * If we will not make it, the program will have to join on all
1944         * interfaces. On the other hand, multihoming host (or router, but
1945         * not mrouter) cannot join to more than one interface - it will
1946         * result in receiving multiple packets.
1947         */
1948        dev = vif->dev;
1949        skb->dev = dev;
1950        vif->pkt_out++;
1951        vif->bytes_out += skb->len;
1952
1953        /* We are about to write */
1954        /* XXX: extension headers? */
1955        if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1956                goto out_free;
1957
1958        ipv6h = ipv6_hdr(skb);
1959        ipv6h->hop_limit--;
1960
1961        IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1962
1963        return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1964                       ip6mr_forward2_finish);
1965
1966out_free:
1967        kfree_skb(skb);
1968        return 0;
1969}
1970
1971static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1972{
1973        int ct;
1974
1975        for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1976                if (mrt->vif6_table[ct].dev == dev)
1977                        break;
1978        }
1979        return ct;
1980}
1981
1982static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1983                          struct sk_buff *skb, struct mfc6_cache *cache)
1984{
1985        int psend = -1;
1986        int vif, ct;
1987
1988        vif = cache->mf6c_parent;
1989        cache->mfc_un.res.pkt++;
1990        cache->mfc_un.res.bytes += skb->len;
1991
1992        /*
1993         * Wrong interface: drop packet and (maybe) send PIM assert.
1994         */
1995        if (mrt->vif6_table[vif].dev != skb->dev) {
1996                int true_vifi;
1997
1998                cache->mfc_un.res.wrong_if++;
1999                true_vifi = ip6mr_find_vif(mrt, skb->dev);
2000
2001                if (true_vifi >= 0 && mrt->mroute_do_assert &&
2002                    /* pimsm uses asserts, when switching from RPT to SPT,
2003                       so that we cannot check that packet arrived on an oif.
2004                       It is bad, but otherwise we would need to move pretty
2005                       large chunk of pimd to kernel. Ough... --ANK
2006                     */
2007                    (mrt->mroute_do_pim ||
2008                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
2009                    time_after(jiffies,
2010                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2011                        cache->mfc_un.res.last_assert = jiffies;
2012                        ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2013                }
2014                goto dont_forward;
2015        }
2016
2017        mrt->vif6_table[vif].pkt_in++;
2018        mrt->vif6_table[vif].bytes_in += skb->len;
2019
2020        /*
2021         *      Forward the frame
2022         */
2023        for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2024                if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2025                        if (psend != -1) {
2026                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2027                                if (skb2)
2028                                        ip6mr_forward2(net, mrt, skb2, cache, psend);
2029                        }
2030                        psend = ct;
2031                }
2032        }
2033        if (psend != -1) {
2034                ip6mr_forward2(net, mrt, skb, cache, psend);
2035                return 0;
2036        }
2037
2038dont_forward:
2039        kfree_skb(skb);
2040        return 0;
2041}
2042
2043
2044/*
2045 *      Multicast packets for forwarding arrive here
2046 */
2047
2048int ip6_mr_input(struct sk_buff *skb)
2049{
2050        struct mfc6_cache *cache;
2051        struct net *net = dev_net(skb->dev);
2052        struct mr6_table *mrt;
2053        struct flowi6 fl6 = {
2054                .flowi6_iif     = skb->dev->ifindex,
2055                .flowi6_mark    = skb->mark,
2056        };
2057        int err;
2058
2059        err = ip6mr_fib_lookup(net, &fl6, &mrt);
2060        if (err < 0) {
2061                kfree_skb(skb);
2062                return err;
2063        }
2064
2065        read_lock(&mrt_lock);
2066        cache = ip6mr_cache_find(mrt,
2067                                 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2068
2069        /*
2070         *      No usable cache entry
2071         */
2072        if (cache == NULL) {
2073                int vif;
2074
2075                vif = ip6mr_find_vif(mrt, skb->dev);
2076                if (vif >= 0) {
2077                        int err = ip6mr_cache_unresolved(mrt, vif, skb);
2078                        read_unlock(&mrt_lock);
2079
2080                        return err;
2081                }
2082                read_unlock(&mrt_lock);
2083                kfree_skb(skb);
2084                return -ENODEV;
2085        }
2086
2087        ip6_mr_forward(net, mrt, skb, cache);
2088
2089        read_unlock(&mrt_lock);
2090
2091        return 0;
2092}
2093
2094
2095static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2096                               struct mfc6_cache *c, struct rtmsg *rtm)
2097{
2098        int ct;
2099        struct rtnexthop *nhp;
2100        u8 *b = skb_tail_pointer(skb);
2101        struct rtattr *mp_head;
2102
2103        /* If cache is unresolved, don't try to parse IIF and OIF */
2104        if (c->mf6c_parent >= MAXMIFS)
2105                return -ENOENT;
2106
2107        if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2108            nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2109                return -EMSGSIZE;
2110
2111        mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
2112
2113        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2114                if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2115                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
2116                                goto rtattr_failure;
2117                        nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
2118                        nhp->rtnh_flags = 0;
2119                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2120                        nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2121                        nhp->rtnh_len = sizeof(*nhp);
2122                }
2123        }
2124        mp_head->rta_type = RTA_MULTIPATH;
2125        mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
2126        rtm->rtm_type = RTN_MULTICAST;
2127        return 1;
2128
2129rtattr_failure:
2130        nlmsg_trim(skb, b);
2131        return -EMSGSIZE;
2132}
2133
2134int ip6mr_get_route(struct net *net,
2135                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2136{
2137        int err;
2138        struct mr6_table *mrt;
2139        struct mfc6_cache *cache;
2140        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2141
2142        mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2143        if (mrt == NULL)
2144                return -ENOENT;
2145
2146        read_lock(&mrt_lock);
2147        cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2148
2149        if (!cache) {
2150                struct sk_buff *skb2;
2151                struct ipv6hdr *iph;
2152                struct net_device *dev;
2153                int vif;
2154
2155                if (nowait) {
2156                        read_unlock(&mrt_lock);
2157                        return -EAGAIN;
2158                }
2159
2160                dev = skb->dev;
2161                if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2162                        read_unlock(&mrt_lock);
2163                        return -ENODEV;
2164                }
2165
2166                /* really correct? */
2167                skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2168                if (!skb2) {
2169                        read_unlock(&mrt_lock);
2170                        return -ENOMEM;
2171                }
2172
2173                skb_reset_transport_header(skb2);
2174
2175                skb_put(skb2, sizeof(struct ipv6hdr));
2176                skb_reset_network_header(skb2);
2177
2178                iph = ipv6_hdr(skb2);
2179                iph->version = 0;
2180                iph->priority = 0;
2181                iph->flow_lbl[0] = 0;
2182                iph->flow_lbl[1] = 0;
2183                iph->flow_lbl[2] = 0;
2184                iph->payload_len = 0;
2185                iph->nexthdr = IPPROTO_NONE;
2186                iph->hop_limit = 0;
2187                iph->saddr = rt->rt6i_src.addr;
2188                iph->daddr = rt->rt6i_dst.addr;
2189
2190                err = ip6mr_cache_unresolved(mrt, vif, skb2);
2191                read_unlock(&mrt_lock);
2192
2193                return err;
2194        }
2195
2196        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2197                cache->mfc_flags |= MFC_NOTIFY;
2198
2199        err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2200        read_unlock(&mrt_lock);
2201        return err;
2202}
2203
2204static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2205                             u32 portid, u32 seq, struct mfc6_cache *c)
2206{
2207        struct nlmsghdr *nlh;
2208        struct rtmsg *rtm;
2209
2210        nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2211        if (nlh == NULL)
2212                return -EMSGSIZE;
2213
2214        rtm = nlmsg_data(nlh);
2215        rtm->rtm_family   = RTNL_FAMILY_IPMR;
2216        rtm->rtm_dst_len  = 128;
2217        rtm->rtm_src_len  = 128;
2218        rtm->rtm_tos      = 0;
2219        rtm->rtm_table    = mrt->id;
2220        if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2221                goto nla_put_failure;
2222        rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2223        rtm->rtm_protocol = RTPROT_UNSPEC;
2224        rtm->rtm_flags    = 0;
2225
2226        if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2227            nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2228                goto nla_put_failure;
2229        if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2230                goto nla_put_failure;
2231
2232        return nlmsg_end(skb, nlh);
2233
2234nla_put_failure:
2235        nlmsg_cancel(skb, nlh);
2236        return -EMSGSIZE;
2237}
2238
2239static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2240{
2241        struct net *net = sock_net(skb->sk);
2242        struct mr6_table *mrt;
2243        struct mfc6_cache *mfc;
2244        unsigned int t = 0, s_t;
2245        unsigned int h = 0, s_h;
2246        unsigned int e = 0, s_e;
2247
2248        s_t = cb->args[0];
2249        s_h = cb->args[1];
2250        s_e = cb->args[2];
2251
2252        read_lock(&mrt_lock);
2253        ip6mr_for_each_table(mrt, net) {
2254                if (t < s_t)
2255                        goto next_table;
2256                if (t > s_t)
2257                        s_h = 0;
2258                for (h = s_h; h < MFC6_LINES; h++) {
2259                        list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2260                                if (e < s_e)
2261                                        goto next_entry;
2262                                if (ip6mr_fill_mroute(mrt, skb,
2263                                                      NETLINK_CB(cb->skb).portid,
2264                                                      cb->nlh->nlmsg_seq,
2265                                                      mfc) < 0)
2266                                        goto done;
2267next_entry:
2268                                e++;
2269                        }
2270                        e = s_e = 0;
2271                }
2272                s_h = 0;
2273next_table:
2274                t++;
2275        }
2276done:
2277        read_unlock(&mrt_lock);
2278
2279        cb->args[2] = e;
2280        cb->args[1] = h;
2281        cb->args[0] = t;
2282
2283        return skb->len;
2284}
2285
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.