linux/net/ipv4/ipmr.c
<<
>>
Prefs
   1/*
   2 *      IP multicast routing support for mrouted 3.6/3.8
   3 *
   4 *              (c) 1995 Alan Cox, <alan@redhat.com>
   5 *        Linux Consultancy and Custom Driver Development
   6 *
   7 *      This program is free software; you can redistribute it and/or
   8 *      modify it under the terms of the GNU General Public License
   9 *      as published by the Free Software Foundation; either version
  10 *      2 of the License, or (at your option) any later version.
  11 *
  12 *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
  13 *
  14 *      Fixes:
  15 *      Michael Chastain        :       Incorrect size of copying.
  16 *      Alan Cox                :       Added the cache manager code
  17 *      Alan Cox                :       Fixed the clone/copy bug and device race.
  18 *      Mike McLagan            :       Routing by source
  19 *      Malcolm Beattie         :       Buffer handling fixes.
  20 *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  21 *      SVR Anand               :       Fixed several multicast bugs and problems.
  22 *      Alexey Kuznetsov        :       Status, optimisations and more.
  23 *      Brad Parker             :       Better behaviour on mrouted upcall
  24 *                                      overflow.
  25 *      Carlos Picoto           :       PIMv1 Support
  26 *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  27 *                                      Relax this requrement to work with older peers.
  28 *
  29 */
  30
  31#include <asm/system.h>
  32#include <asm/uaccess.h>
  33#include <linux/types.h>
  34#include <linux/sched.h>
  35#include <linux/capability.h>
  36#include <linux/errno.h>
  37#include <linux/timer.h>
  38#include <linux/mm.h>
  39#include <linux/kernel.h>
  40#include <linux/fcntl.h>
  41#include <linux/stat.h>
  42#include <linux/socket.h>
  43#include <linux/in.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/inetdevice.h>
  47#include <linux/igmp.h>
  48#include <linux/proc_fs.h>
  49#include <linux/seq_file.h>
  50#include <linux/mroute.h>
  51#include <linux/init.h>
  52#include <linux/if_ether.h>
  53#include <net/ip.h>
  54#include <net/protocol.h>
  55#include <linux/skbuff.h>
  56#include <net/route.h>
  57#include <net/sock.h>
  58#include <net/icmp.h>
  59#include <net/udp.h>
  60#include <net/raw.h>
  61#include <linux/notifier.h>
  62#include <linux/if_arp.h>
  63#include <linux/netfilter_ipv4.h>
  64#include <net/ipip.h>
  65#include <net/checksum.h>
  66
  67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  68#define CONFIG_IP_PIMSM 1
  69#endif
  70
  71static struct sock *mroute_socket;
  72
  73
  74/* Big lock, protecting vif table, mrt cache and mroute socket state.
  75   Note that the changes are semaphored via rtnl_lock.
  76 */
  77
  78static DEFINE_RWLOCK(mrt_lock);
  79
  80/*
  81 *      Multicast router control variables
  82 */
  83
  84static struct vif_device vif_table[MAXVIFS];            /* Devices              */
  85static int maxvif;
  86
  87#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
  88
  89static int mroute_do_assert;                            /* Set in PIM assert    */
  90static int mroute_do_pim;
  91
  92static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
  93
  94static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  95static atomic_t cache_resolve_queue_len;                /* Size of unresolved   */
  96
  97/* Special spinlock for queue of unresolved entries */
  98static DEFINE_SPINLOCK(mfc_unres_lock);
  99
 100/* We return to original Alan's scheme. Hash table of resolved
 101   entries is changed only in process context and protected
 102   with weak lock mrt_lock. Queue of unresolved entries is protected
 103   with strong spinlock mfc_unres_lock.
 104
 105   In this case data path is free of exclusive locks at all.
 106 */
 107
 108static struct kmem_cache *mrt_cachep __read_mostly;
 109
 110static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
 111static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
 112static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 113
 114#ifdef CONFIG_IP_PIMSM_V2
 115static struct net_protocol pim_protocol;
 116#endif
 117
 118static struct timer_list ipmr_expire_timer;
 119
 120/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 121
 122static
 123struct net_device *ipmr_new_tunnel(struct vifctl *v)
 124{
 125        struct net_device  *dev;
 126
 127        dev = __dev_get_by_name("tunl0");
 128
 129        if (dev) {
 130                int err;
 131                struct ifreq ifr;
 132                mm_segment_t    oldfs;
 133                struct ip_tunnel_parm p;
 134                struct in_device  *in_dev;
 135
 136                memset(&p, 0, sizeof(p));
 137                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 138                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 139                p.iph.version = 4;
 140                p.iph.ihl = 5;
 141                p.iph.protocol = IPPROTO_IPIP;
 142                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 143                ifr.ifr_ifru.ifru_data = (void*)&p;
 144
 145                oldfs = get_fs(); set_fs(KERNEL_DS);
 146                err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 147                set_fs(oldfs);
 148
 149                dev = NULL;
 150
 151                if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
 152                        dev->flags |= IFF_MULTICAST;
 153
 154                        in_dev = __in_dev_get_rtnl(dev);
 155                        if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
 156                                goto failure;
 157                        in_dev->cnf.rp_filter = 0;
 158
 159                        if (dev_open(dev))
 160                                goto failure;
 161                }
 162        }
 163        return dev;
 164
 165failure:
 166        /* allow the register to be completed before unregistering. */
 167        rtnl_unlock();
 168        rtnl_lock();
 169
 170        unregister_netdevice(dev);
 171        return NULL;
 172}
 173
 174#ifdef CONFIG_IP_PIMSM
 175
 176static int reg_vif_num = -1;
 177
 178static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 179{
 180        read_lock(&mrt_lock);
 181        ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
 182        ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
 183        ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 184        read_unlock(&mrt_lock);
 185        kfree_skb(skb);
 186        return 0;
 187}
 188
 189static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
 190{
 191        return (struct net_device_stats*)netdev_priv(dev);
 192}
 193
 194static void reg_vif_setup(struct net_device *dev)
 195{
 196        dev->type               = ARPHRD_PIMREG;
 197        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 198        dev->flags              = IFF_NOARP;
 199        dev->hard_start_xmit    = reg_vif_xmit;
 200        dev->get_stats          = reg_vif_get_stats;
 201        dev->destructor         = free_netdev;
 202}
 203
 204static struct net_device *ipmr_reg_vif(void)
 205{
 206        struct net_device *dev;
 207        struct in_device *in_dev;
 208
 209        dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
 210                           reg_vif_setup);
 211
 212        if (dev == NULL)
 213                return NULL;
 214
 215        if (register_netdevice(dev)) {
 216                free_netdev(dev);
 217                return NULL;
 218        }
 219        dev->iflink = 0;
 220
 221        if ((in_dev = inetdev_init(dev)) == NULL)
 222                goto failure;
 223
 224        in_dev->cnf.rp_filter = 0;
 225
 226        if (dev_open(dev))
 227                goto failure;
 228
 229        return dev;
 230
 231failure:
 232        /* allow the register to be completed before unregistering. */
 233        rtnl_unlock();
 234        rtnl_lock();
 235
 236        unregister_netdevice(dev);
 237        return NULL;
 238}
 239#endif
 240
 241/*
 242 *      Delete a VIF entry
 243 */
 244 
 245static int vif_delete(int vifi)
 246{
 247        struct vif_device *v;
 248        struct net_device *dev;
 249        struct in_device *in_dev;
 250
 251        if (vifi < 0 || vifi >= maxvif)
 252                return -EADDRNOTAVAIL;
 253
 254        v = &vif_table[vifi];
 255
 256        write_lock_bh(&mrt_lock);
 257        dev = v->dev;
 258        v->dev = NULL;
 259
 260        if (!dev) {
 261                write_unlock_bh(&mrt_lock);
 262                return -EADDRNOTAVAIL;
 263        }
 264
 265#ifdef CONFIG_IP_PIMSM
 266        if (vifi == reg_vif_num)
 267                reg_vif_num = -1;
 268#endif
 269
 270        if (vifi+1 == maxvif) {
 271                int tmp;
 272                for (tmp=vifi-1; tmp>=0; tmp--) {
 273                        if (VIF_EXISTS(tmp))
 274                                break;
 275                }
 276                maxvif = tmp+1;
 277        }
 278
 279        write_unlock_bh(&mrt_lock);
 280
 281        dev_set_allmulti(dev, -1);
 282
 283        if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 284                in_dev->cnf.mc_forwarding--;
 285                ip_rt_multicast_event(in_dev);
 286        }
 287
 288        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 289                unregister_netdevice(dev);
 290
 291        dev_put(dev);
 292        return 0;
 293}
 294
 295/* Destroy an unresolved cache entry, killing queued skbs
 296   and reporting error to netlink readers.
 297 */
 298
 299static void ipmr_destroy_unres(struct mfc_cache *c)
 300{
 301        struct sk_buff *skb;
 302        struct nlmsgerr *e;
 303
 304        atomic_dec(&cache_resolve_queue_len);
 305
 306        while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
 307                if (skb->nh.iph->version == 0) {
 308                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 309                        nlh->nlmsg_type = NLMSG_ERROR;
 310                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 311                        skb_trim(skb, nlh->nlmsg_len);
 312                        e = NLMSG_DATA(nlh);
 313                        e->error = -ETIMEDOUT;
 314                        memset(&e->msg, 0, sizeof(e->msg));
 315
 316                        rtnl_unicast(skb, NETLINK_CB(skb).pid);
 317                } else
 318                        kfree_skb(skb);
 319        }
 320
 321        kmem_cache_free(mrt_cachep, c);
 322}
 323
 324
 325/* Single timer process for all the unresolved queue. */
 326
 327static void ipmr_expire_process(unsigned long dummy)
 328{
 329        unsigned long now;
 330        unsigned long expires;
 331        struct mfc_cache *c, **cp;
 332
 333        if (!spin_trylock(&mfc_unres_lock)) {
 334                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 335                return;
 336        }
 337
 338        if (atomic_read(&cache_resolve_queue_len) == 0)
 339                goto out;
 340
 341        now = jiffies;
 342        expires = 10*HZ;
 343        cp = &mfc_unres_queue;
 344
 345        while ((c=*cp) != NULL) {
 346                if (time_after(c->mfc_un.unres.expires, now)) {
 347                        unsigned long interval = c->mfc_un.unres.expires - now;
 348                        if (interval < expires)
 349                                expires = interval;
 350                        cp = &c->next;
 351                        continue;
 352                }
 353
 354                *cp = c->next;
 355
 356                ipmr_destroy_unres(c);
 357        }
 358
 359        if (atomic_read(&cache_resolve_queue_len))
 360                mod_timer(&ipmr_expire_timer, jiffies + expires);
 361
 362out:
 363        spin_unlock(&mfc_unres_lock);
 364}
 365
 366/* Fill oifs list. It is called under write locked mrt_lock. */
 367
 368static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 369{
 370        int vifi;
 371
 372        cache->mfc_un.res.minvif = MAXVIFS;
 373        cache->mfc_un.res.maxvif = 0;
 374        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 375
 376        for (vifi=0; vifi<maxvif; vifi++) {
 377                if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
 378                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 379                        if (cache->mfc_un.res.minvif > vifi)
 380                                cache->mfc_un.res.minvif = vifi;
 381                        if (cache->mfc_un.res.maxvif <= vifi)
 382                                cache->mfc_un.res.maxvif = vifi + 1;
 383                }
 384        }
 385}
 386
 387static int vif_add(struct vifctl *vifc, int mrtsock)
 388{
 389        int vifi = vifc->vifc_vifi;
 390        struct vif_device *v = &vif_table[vifi];
 391        struct net_device *dev;
 392        struct in_device *in_dev;
 393
 394        /* Is vif busy ? */
 395        if (VIF_EXISTS(vifi))
 396                return -EADDRINUSE;
 397
 398        switch (vifc->vifc_flags) {
 399#ifdef CONFIG_IP_PIMSM
 400        case VIFF_REGISTER:
 401                /*
 402                 * Special Purpose VIF in PIM
 403                 * All the packets will be sent to the daemon
 404                 */
 405                if (reg_vif_num >= 0)
 406                        return -EADDRINUSE;
 407                dev = ipmr_reg_vif();
 408                if (!dev)
 409                        return -ENOBUFS;
 410                break;
 411#endif
 412        case VIFF_TUNNEL:       
 413                dev = ipmr_new_tunnel(vifc);
 414                if (!dev)
 415                        return -ENOBUFS;
 416                break;
 417        case 0:
 418                dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
 419                if (!dev)
 420                        return -EADDRNOTAVAIL;
 421                dev_put(dev);
 422                break;
 423        default:
 424                return -EINVAL;
 425        }
 426
 427        if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 428                return -EADDRNOTAVAIL;
 429        in_dev->cnf.mc_forwarding++;
 430        dev_set_allmulti(dev, +1);
 431        ip_rt_multicast_event(in_dev);
 432
 433        /*
 434         *      Fill in the VIF structures
 435         */
 436        v->rate_limit=vifc->vifc_rate_limit;
 437        v->local=vifc->vifc_lcl_addr.s_addr;
 438        v->remote=vifc->vifc_rmt_addr.s_addr;
 439        v->flags=vifc->vifc_flags;
 440        if (!mrtsock)
 441                v->flags |= VIFF_STATIC;
 442        v->threshold=vifc->vifc_threshold;
 443        v->bytes_in = 0;
 444        v->bytes_out = 0;
 445        v->pkt_in = 0;
 446        v->pkt_out = 0;
 447        v->link = dev->ifindex;
 448        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 449                v->link = dev->iflink;
 450
 451        /* And finish update writing critical data */
 452        write_lock_bh(&mrt_lock);
 453        dev_hold(dev);
 454        v->dev=dev;
 455#ifdef CONFIG_IP_PIMSM
 456        if (v->flags&VIFF_REGISTER)
 457                reg_vif_num = vifi;
 458#endif
 459        if (vifi+1 > maxvif)
 460                maxvif = vifi+1;
 461        write_unlock_bh(&mrt_lock);
 462        return 0;
 463}
 464
 465static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
 466{
 467        int line=MFC_HASH(mcastgrp,origin);
 468        struct mfc_cache *c;
 469
 470        for (c=mfc_cache_array[line]; c; c = c->next) {
 471                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 472                        break;
 473        }
 474        return c;
 475}
 476
 477/*
 478 *      Allocate a multicast cache entry
 479 */
 480static struct mfc_cache *ipmr_cache_alloc(void)
 481{
 482        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
 483        if(c==NULL)
 484                return NULL;
 485        memset(c, 0, sizeof(*c));
 486        c->mfc_un.res.minvif = MAXVIFS;
 487        return c;
 488}
 489
 490static struct mfc_cache *ipmr_cache_alloc_unres(void)
 491{
 492        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
 493        if(c==NULL)
 494                return NULL;
 495        memset(c, 0, sizeof(*c));
 496        skb_queue_head_init(&c->mfc_un.unres.unresolved);
 497        c->mfc_un.unres.expires = jiffies + 10*HZ;
 498        return c;
 499}
 500
 501/*
 502 *      A cache entry has gone into a resolved state from queued
 503 */
 504 
 505static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 506{
 507        struct sk_buff *skb;
 508        struct nlmsgerr *e;
 509
 510        /*
 511         *      Play the pending entries through our router
 512         */
 513
 514        while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 515                if (skb->nh.iph->version == 0) {
 516                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 517
 518                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 519                                nlh->nlmsg_len = skb->tail - (u8*)nlh;
 520                        } else {
 521                                nlh->nlmsg_type = NLMSG_ERROR;
 522                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 523                                skb_trim(skb, nlh->nlmsg_len);
 524                                e = NLMSG_DATA(nlh);
 525                                e->error = -EMSGSIZE;
 526                                memset(&e->msg, 0, sizeof(e->msg));
 527                        }
 528
 529                        rtnl_unicast(skb, NETLINK_CB(skb).pid);
 530                } else
 531                        ip_mr_forward(skb, c, 0);
 532        }
 533}
 534
 535/*
 536 *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 537 *      expects the following bizarre scheme.
 538 *
 539 *      Called under mrt_lock.
 540 */
 541 
 542static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 543{
 544        struct sk_buff *skb;
 545        int ihl = pkt->nh.iph->ihl<<2;
 546        struct igmphdr *igmp;
 547        struct igmpmsg *msg;
 548        int ret;
 549
 550#ifdef CONFIG_IP_PIMSM
 551        if (assert == IGMPMSG_WHOLEPKT)
 552                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 553        else
 554#endif
 555                skb = alloc_skb(128, GFP_ATOMIC);
 556
 557        if(!skb)
 558                return -ENOBUFS;
 559
 560#ifdef CONFIG_IP_PIMSM
 561        if (assert == IGMPMSG_WHOLEPKT) {
 562                /* Ugly, but we have no choice with this interface.
 563                   Duplicate old header, fix ihl, length etc.
 564                   And all this only to mangle msg->im_msgtype and
 565                   to set msg->im_mbz to "mbz" :-)
 566                 */
 567                msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
 568                skb->nh.raw = skb->h.raw = (u8*)msg;
 569                memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
 570                msg->im_msgtype = IGMPMSG_WHOLEPKT;
 571                msg->im_mbz = 0;
 572                msg->im_vif = reg_vif_num;
 573                skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
 574                skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
 575        } else 
 576#endif
 577        {       
 578                
 579        /*
 580         *      Copy the IP header
 581         */
 582
 583        skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
 584        memcpy(skb->data,pkt->data,ihl);
 585        skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
 586        msg = (struct igmpmsg*)skb->nh.iph;
 587        msg->im_vif = vifi;
 588        skb->dst = dst_clone(pkt->dst);
 589
 590        /*
 591         *      Add our header
 592         */
 593
 594        igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
 595        igmp->type      =
 596        msg->im_msgtype = assert;
 597        igmp->code      =       0;
 598        skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
 599        skb->h.raw = skb->nh.raw;
 600        }
 601
 602        if (mroute_socket == NULL) {
 603                kfree_skb(skb);
 604                return -EINVAL;
 605        }
 606
 607        /*
 608         *      Deliver to mrouted
 609         */
 610        if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
 611                if (net_ratelimit())
 612                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 613                kfree_skb(skb);
 614        }
 615
 616        return ret;
 617}
 618
 619/*
 620 *      Queue a packet for resolution. It gets locked cache entry!
 621 */
 622 
 623static int
 624ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 625{
 626        int err;
 627        struct mfc_cache *c;
 628
 629        spin_lock_bh(&mfc_unres_lock);
 630        for (c=mfc_unres_queue; c; c=c->next) {
 631                if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
 632                    c->mfc_origin == skb->nh.iph->saddr)
 633                        break;
 634        }
 635
 636        if (c == NULL) {
 637                /*
 638                 *      Create a new entry if allowable
 639                 */
 640
 641                if (atomic_read(&cache_resolve_queue_len)>=10 ||
 642                    (c=ipmr_cache_alloc_unres())==NULL) {
 643                        spin_unlock_bh(&mfc_unres_lock);
 644
 645                        kfree_skb(skb);
 646                        return -ENOBUFS;
 647                }
 648
 649                /*
 650                 *      Fill in the new cache entry
 651                 */
 652                c->mfc_parent=-1;
 653                c->mfc_origin=skb->nh.iph->saddr;
 654                c->mfc_mcastgrp=skb->nh.iph->daddr;
 655
 656                /*
 657                 *      Reflect first query at mrouted.
 658                 */
 659                if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
 660                        /* If the report failed throw the cache entry 
 661                           out - Brad Parker
 662                         */
 663                        spin_unlock_bh(&mfc_unres_lock);
 664
 665                        kmem_cache_free(mrt_cachep, c);
 666                        kfree_skb(skb);
 667                        return err;
 668                }
 669
 670                atomic_inc(&cache_resolve_queue_len);
 671                c->next = mfc_unres_queue;
 672                mfc_unres_queue = c;
 673
 674                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 675        }
 676
 677        /*
 678         *      See if we can append the packet
 679         */
 680        if (c->mfc_un.unres.unresolved.qlen>3) {
 681                kfree_skb(skb);
 682                err = -ENOBUFS;
 683        } else {
 684                skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
 685                err = 0;
 686        }
 687
 688        spin_unlock_bh(&mfc_unres_lock);
 689        return err;
 690}
 691
 692/*
 693 *      MFC cache manipulation by user space mroute daemon
 694 */
 695
 696static int ipmr_mfc_delete(struct mfcctl *mfc)
 697{
 698        int line;
 699        struct mfc_cache *c, **cp;
 700
 701        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 702
 703        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 704                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 705                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 706                        write_lock_bh(&mrt_lock);
 707                        *cp = c->next;
 708                        write_unlock_bh(&mrt_lock);
 709
 710                        kmem_cache_free(mrt_cachep, c);
 711                        return 0;
 712                }
 713        }
 714        return -ENOENT;
 715}
 716
 717static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 718{
 719        int line;
 720        struct mfc_cache *uc, *c, **cp;
 721
 722        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 723
 724        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 725                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 726                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 727                        break;
 728        }
 729
 730        if (c != NULL) {
 731                write_lock_bh(&mrt_lock);
 732                c->mfc_parent = mfc->mfcc_parent;
 733                ipmr_update_thresholds(c, mfc->mfcc_ttls);
 734                if (!mrtsock)
 735                        c->mfc_flags |= MFC_STATIC;
 736                write_unlock_bh(&mrt_lock);
 737                return 0;
 738        }
 739
 740        if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
 741                return -EINVAL;
 742
 743        c=ipmr_cache_alloc();
 744        if (c==NULL)
 745                return -ENOMEM;
 746
 747        c->mfc_origin=mfc->mfcc_origin.s_addr;
 748        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
 749        c->mfc_parent=mfc->mfcc_parent;
 750        ipmr_update_thresholds(c, mfc->mfcc_ttls);
 751        if (!mrtsock)
 752                c->mfc_flags |= MFC_STATIC;
 753
 754        write_lock_bh(&mrt_lock);
 755        c->next = mfc_cache_array[line];
 756        mfc_cache_array[line] = c;
 757        write_unlock_bh(&mrt_lock);
 758
 759        /*
 760         *      Check to see if we resolved a queued list. If so we
 761         *      need to send on the frames and tidy up.
 762         */
 763        spin_lock_bh(&mfc_unres_lock);
 764        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 765             cp = &uc->next) {
 766                if (uc->mfc_origin == c->mfc_origin &&
 767                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 768                        *cp = uc->next;
 769                        if (atomic_dec_and_test(&cache_resolve_queue_len))
 770                                del_timer(&ipmr_expire_timer);
 771                        break;
 772                }
 773        }
 774        spin_unlock_bh(&mfc_unres_lock);
 775
 776        if (uc) {
 777                ipmr_cache_resolve(uc, c);
 778                kmem_cache_free(mrt_cachep, uc);
 779        }
 780        return 0;
 781}
 782
 783/*
 784 *      Close the multicast socket, and clear the vif tables etc
 785 */
 786 
 787static void mroute_clean_tables(struct sock *sk)
 788{
 789        int i;
 790                
 791        /*
 792         *      Shut down all active vif entries
 793         */
 794        for(i=0; i<maxvif; i++) {
 795                if (!(vif_table[i].flags&VIFF_STATIC))
 796                        vif_delete(i);
 797        }
 798
 799        /*
 800         *      Wipe the cache
 801         */
 802        for (i=0;i<MFC_LINES;i++) {
 803                struct mfc_cache *c, **cp;
 804
 805                cp = &mfc_cache_array[i];
 806                while ((c = *cp) != NULL) {
 807                        if (c->mfc_flags&MFC_STATIC) {
 808                                cp = &c->next;
 809                                continue;
 810                        }
 811                        write_lock_bh(&mrt_lock);
 812                        *cp = c->next;
 813                        write_unlock_bh(&mrt_lock);
 814
 815                        kmem_cache_free(mrt_cachep, c);
 816                }
 817        }
 818
 819        if (atomic_read(&cache_resolve_queue_len) != 0) {
 820                struct mfc_cache *c;
 821
 822                spin_lock_bh(&mfc_unres_lock);
 823                while (mfc_unres_queue != NULL) {
 824                        c = mfc_unres_queue;
 825                        mfc_unres_queue = c->next;
 826                        spin_unlock_bh(&mfc_unres_lock);
 827
 828                        ipmr_destroy_unres(c);
 829
 830                        spin_lock_bh(&mfc_unres_lock);
 831                }
 832                spin_unlock_bh(&mfc_unres_lock);
 833        }
 834}
 835
 836static void mrtsock_destruct(struct sock *sk)
 837{
 838        rtnl_lock();
 839        if (sk == mroute_socket) {
 840                ipv4_devconf.mc_forwarding--;
 841
 842                write_lock_bh(&mrt_lock);
 843                mroute_socket=NULL;
 844                write_unlock_bh(&mrt_lock);
 845
 846                mroute_clean_tables(sk);
 847        }
 848        rtnl_unlock();
 849}
 850
 851/*
 852 *      Socket options and virtual interface manipulation. The whole
 853 *      virtual interface system is a complete heap, but unfortunately
 854 *      that's how BSD mrouted happens to think. Maybe one day with a proper
 855 *      MOSPF/PIM router set up we can clean this up.
 856 */
 857 
 858int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
 859{
 860        int ret;
 861        struct vifctl vif;
 862        struct mfcctl mfc;
 863        
 864        if(optname!=MRT_INIT)
 865        {
 866                if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
 867                        return -EACCES;
 868        }
 869
 870        switch(optname)
 871        {
 872                case MRT_INIT:
 873                        if (sk->sk_type != SOCK_RAW ||
 874                            inet_sk(sk)->num != IPPROTO_IGMP)
 875                                return -EOPNOTSUPP;
 876                        if(optlen!=sizeof(int))
 877                                return -ENOPROTOOPT;
 878
 879                        rtnl_lock();
 880                        if (mroute_socket) {
 881                                rtnl_unlock();
 882                                return -EADDRINUSE;
 883                        }
 884
 885                        ret = ip_ra_control(sk, 1, mrtsock_destruct);
 886                        if (ret == 0) {
 887                                write_lock_bh(&mrt_lock);
 888                                mroute_socket=sk;
 889                                write_unlock_bh(&mrt_lock);
 890
 891                                ipv4_devconf.mc_forwarding++;
 892                        }
 893                        rtnl_unlock();
 894                        return ret;
 895                case MRT_DONE:
 896                        if (sk!=mroute_socket)
 897                                return -EACCES;
 898                        return ip_ra_control(sk, 0, NULL);
 899                case MRT_ADD_VIF:
 900                case MRT_DEL_VIF:
 901                        if(optlen!=sizeof(vif))
 902                                return -EINVAL;
 903                        if (copy_from_user(&vif,optval,sizeof(vif)))
 904                                return -EFAULT; 
 905                        if(vif.vifc_vifi >= MAXVIFS)
 906                                return -ENFILE;
 907                        rtnl_lock();
 908                        if (optname==MRT_ADD_VIF) {
 909                                ret = vif_add(&vif, sk==mroute_socket);
 910                        } else {
 911                                ret = vif_delete(vif.vifc_vifi);
 912                        }
 913                        rtnl_unlock();
 914                        return ret;
 915
 916                /*
 917                 *      Manipulate the forwarding caches. These live
 918                 *      in a sort of kernel/user symbiosis.
 919                 */
 920                case MRT_ADD_MFC:
 921                case MRT_DEL_MFC:
 922                        if(optlen!=sizeof(mfc))
 923                                return -EINVAL;
 924                        if (copy_from_user(&mfc,optval, sizeof(mfc)))
 925                                return -EFAULT;
 926                        rtnl_lock();
 927                        if (optname==MRT_DEL_MFC)
 928                                ret = ipmr_mfc_delete(&mfc);
 929                        else
 930                                ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
 931                        rtnl_unlock();
 932                        return ret;
 933                /*
 934                 *      Control PIM assert.
 935                 */
 936                case MRT_ASSERT:
 937                {
 938                        int v;
 939                        if(get_user(v,(int __user *)optval))
 940                                return -EFAULT;
 941                        mroute_do_assert=(v)?1:0;
 942                        return 0;
 943                }
 944#ifdef CONFIG_IP_PIMSM
 945                case MRT_PIM:
 946                {
 947                        int v, ret;
 948                        if(get_user(v,(int __user *)optval))
 949                                return -EFAULT;
 950                        v = (v)?1:0;
 951                        rtnl_lock();
 952                        ret = 0;
 953                        if (v != mroute_do_pim) {
 954                                mroute_do_pim = v;
 955                                mroute_do_assert = v;
 956#ifdef CONFIG_IP_PIMSM_V2
 957                                if (mroute_do_pim)
 958                                        ret = inet_add_protocol(&pim_protocol,
 959                                                                IPPROTO_PIM);
 960                                else
 961                                        ret = inet_del_protocol(&pim_protocol,
 962                                                                IPPROTO_PIM);
 963                                if (ret < 0)
 964                                        ret = -EAGAIN;
 965#endif
 966                        }
 967                        rtnl_unlock();
 968                        return ret;
 969                }
 970#endif
 971                /*
 972                 *      Spurious command, or MRT_VERSION which you cannot
 973                 *      set.
 974                 */
 975                default:
 976                        return -ENOPROTOOPT;
 977        }
 978}
 979
 980/*
 981 *      Getsock opt support for the multicast routing system.
 982 */
 983 
 984int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
 985{
 986        int olr;
 987        int val;
 988
 989        if(optname!=MRT_VERSION && 
 990#ifdef CONFIG_IP_PIMSM
 991           optname!=MRT_PIM &&
 992#endif
 993           optname!=MRT_ASSERT)
 994                return -ENOPROTOOPT;
 995
 996        if (get_user(olr, optlen))
 997                return -EFAULT;
 998
 999        olr = min_t(unsigned int, olr, sizeof(int));
1000        if (olr < 0)
1001                return -EINVAL;
1002                
1003        if(put_user(olr,optlen))
1004                return -EFAULT;
1005        if(optname==MRT_VERSION)
1006                val=0x0305;
1007#ifdef CONFIG_IP_PIMSM
1008        else if(optname==MRT_PIM)
1009                val=mroute_do_pim;
1010#endif
1011        else
1012                val=mroute_do_assert;
1013        if(copy_to_user(optval,&val,olr))
1014                return -EFAULT;
1015        return 0;
1016}
1017
1018/*
1019 *      The IP multicast ioctl support routines.
1020 */
1021 
1022int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1023{
1024        struct sioc_sg_req sr;
1025        struct sioc_vif_req vr;
1026        struct vif_device *vif;
1027        struct mfc_cache *c;
1028        
1029        switch(cmd)
1030        {
1031                case SIOCGETVIFCNT:
1032                        if (copy_from_user(&vr,arg,sizeof(vr)))
1033                                return -EFAULT; 
1034                        if(vr.vifi>=maxvif)
1035                                return -EINVAL;
1036                        read_lock(&mrt_lock);
1037                        vif=&vif_table[vr.vifi];
1038                        if(VIF_EXISTS(vr.vifi)) {
1039                                vr.icount=vif->pkt_in;
1040                                vr.ocount=vif->pkt_out;
1041                                vr.ibytes=vif->bytes_in;
1042                                vr.obytes=vif->bytes_out;
1043                                read_unlock(&mrt_lock);
1044
1045                                if (copy_to_user(arg,&vr,sizeof(vr)))
1046                                        return -EFAULT;
1047                                return 0;
1048                        }
1049                        read_unlock(&mrt_lock);
1050                        return -EADDRNOTAVAIL;
1051                case SIOCGETSGCNT:
1052                        if (copy_from_user(&sr,arg,sizeof(sr)))
1053                                return -EFAULT;
1054
1055                        read_lock(&mrt_lock);
1056                        c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1057                        if (c) {
1058                                sr.pktcnt = c->mfc_un.res.pkt;
1059                                sr.bytecnt = c->mfc_un.res.bytes;
1060                                sr.wrong_if = c->mfc_un.res.wrong_if;
1061                                read_unlock(&mrt_lock);
1062
1063                                if (copy_to_user(arg,&sr,sizeof(sr)))
1064                                        return -EFAULT;
1065                                return 0;
1066                        }
1067                        read_unlock(&mrt_lock);
1068                        return -EADDRNOTAVAIL;
1069                default:
1070                        return -ENOIOCTLCMD;
1071        }
1072}
1073
1074
1075static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1076{
1077        struct vif_device *v;
1078        int ct;
1079        if (event != NETDEV_UNREGISTER)
1080                return NOTIFY_DONE;
1081        v=&vif_table[0];
1082        for(ct=0;ct<maxvif;ct++,v++) {
1083                if (v->dev==ptr)
1084                        vif_delete(ct);
1085        }
1086        return NOTIFY_DONE;
1087}
1088
1089
1090static struct notifier_block ip_mr_notifier={
1091        .notifier_call = ipmr_device_event,
1092};
1093
1094/*
1095 *      Encapsulate a packet by attaching a valid IPIP header to it.
1096 *      This avoids tunnel drivers and other mess and gives us the speed so
1097 *      important for multicast video.
1098 */
1099 
1100static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1101{
1102        struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1103
1104        iph->version    =       4;
1105        iph->tos        =       skb->nh.iph->tos;
1106        iph->ttl        =       skb->nh.iph->ttl;
1107        iph->frag_off   =       0;
1108        iph->daddr      =       daddr;
1109        iph->saddr      =       saddr;
1110        iph->protocol   =       IPPROTO_IPIP;
1111        iph->ihl        =       5;
1112        iph->tot_len    =       htons(skb->len);
1113        ip_select_ident(iph, skb->dst, NULL);
1114        ip_send_check(iph);
1115
1116        skb->h.ipiph = skb->nh.iph;
1117        skb->nh.iph = iph;
1118        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1119        nf_reset(skb);
1120}
1121
1122static inline int ipmr_forward_finish(struct sk_buff *skb)
1123{
1124        struct ip_options * opt = &(IPCB(skb)->opt);
1125
1126        IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1127
1128        if (unlikely(opt->optlen))
1129                ip_forward_options(skb);
1130
1131        return dst_output(skb);
1132}
1133
1134/*
1135 *      Processing handlers for ipmr_forward
1136 */
1137
1138static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1139{
1140        struct iphdr *iph = skb->nh.iph;
1141        struct vif_device *vif = &vif_table[vifi];
1142        struct net_device *dev;
1143        struct rtable *rt;
1144        int    encap = 0;
1145
1146        if (vif->dev == NULL)
1147                goto out_free;
1148
1149#ifdef CONFIG_IP_PIMSM
1150        if (vif->flags & VIFF_REGISTER) {
1151                vif->pkt_out++;
1152                vif->bytes_out+=skb->len;
1153                ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
1154                ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
1155                ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1156                kfree_skb(skb);
1157                return;
1158        }
1159#endif
1160
1161        if (vif->flags&VIFF_TUNNEL) {
1162                struct flowi fl = { .oif = vif->link,
1163                                    .nl_u = { .ip4_u =
1164                                              { .daddr = vif->remote,
1165                                                .saddr = vif->local,
1166                                                .tos = RT_TOS(iph->tos) } },
1167                                    .proto = IPPROTO_IPIP };
1168                if (ip_route_output_key(&rt, &fl))
1169                        goto out_free;
1170                encap = sizeof(struct iphdr);
1171        } else {
1172                struct flowi fl = { .oif = vif->link,
1173                                    .nl_u = { .ip4_u =
1174                                              { .daddr = iph->daddr,
1175                                                .tos = RT_TOS(iph->tos) } },
1176                                    .proto = IPPROTO_IPIP };
1177                if (ip_route_output_key(&rt, &fl))
1178                        goto out_free;
1179        }
1180
1181        dev = rt->u.dst.dev;
1182
1183        if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1184                /* Do not fragment multicasts. Alas, IPv4 does not
1185                   allow to send ICMP, so that packets will disappear
1186                   to blackhole.
1187                 */
1188
1189                IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1190                ip_rt_put(rt);
1191                goto out_free;
1192        }
1193
1194        encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1195
1196        if (skb_cow(skb, encap)) {
1197                ip_rt_put(rt);
1198                goto out_free;
1199        }
1200
1201        vif->pkt_out++;
1202        vif->bytes_out+=skb->len;
1203
1204        dst_release(skb->dst);
1205        skb->dst = &rt->u.dst;
1206        iph = skb->nh.iph;
1207        ip_decrease_ttl(iph);
1208
1209        /* FIXME: forward and output firewalls used to be called here.
1210         * What do we do with netfilter? -- RR */
1211        if (vif->flags & VIFF_TUNNEL) {
1212                ip_encap(skb, vif->local, vif->remote);
1213                /* FIXME: extra output firewall step used to be here. --RR */
1214                ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
1215                ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
1216        }
1217
1218        IPCB(skb)->flags |= IPSKB_FORWARDED;
1219
1220        /*
1221         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1222         * not only before forwarding, but after forwarding on all output
1223         * interfaces. It is clear, if mrouter runs a multicasting
1224         * program, it should receive packets not depending to what interface
1225         * program is joined.
1226         * If we will not make it, the program will have to join on all
1227         * interfaces. On the other hand, multihoming host (or router, but
1228         * not mrouter) cannot join to more than one interface - it will
1229         * result in receiving multiple packets.
1230         */
1231        NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 
1232                ipmr_forward_finish);
1233        return;
1234
1235out_free:
1236        kfree_skb(skb);
1237        return;
1238}
1239
1240static int ipmr_find_vif(struct net_device *dev)
1241{
1242        int ct;
1243        for (ct=maxvif-1; ct>=0; ct--) {
1244                if (vif_table[ct].dev == dev)
1245                        break;
1246        }
1247        return ct;
1248}
1249
1250/* "local" means that we should preserve one skb (for local delivery) */
1251
1252static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1253{
1254        int psend = -1;
1255        int vif, ct;
1256
1257        vif = cache->mfc_parent;
1258        cache->mfc_un.res.pkt++;
1259        cache->mfc_un.res.bytes += skb->len;
1260
1261        /*
1262         * Wrong interface: drop packet and (maybe) send PIM assert.
1263         */
1264        if (vif_table[vif].dev != skb->dev) {
1265                int true_vifi;
1266
1267                if (((struct rtable*)skb->dst)->fl.iif == 0) {
1268                        /* It is our own packet, looped back.
1269                           Very complicated situation...
1270
1271                           The best workaround until routing daemons will be
1272                           fixed is not to redistribute packet, if it was
1273                           send through wrong interface. It means, that
1274                           multicast applications WILL NOT work for
1275                           (S,G), which have default multicast route pointing
1276                           to wrong oif. In any case, it is not a good
1277                           idea to use multicasting applications on router.
1278                         */
1279                        goto dont_forward;
1280                }
1281
1282                cache->mfc_un.res.wrong_if++;
1283                true_vifi = ipmr_find_vif(skb->dev);
1284
1285                if (true_vifi >= 0 && mroute_do_assert &&
1286                    /* pimsm uses asserts, when switching from RPT to SPT,
1287                       so that we cannot check that packet arrived on an oif.
1288                       It is bad, but otherwise we would need to move pretty
1289                       large chunk of pimd to kernel. Ough... --ANK
1290                     */
1291                    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1292                    time_after(jiffies, 
1293                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1294                        cache->mfc_un.res.last_assert = jiffies;
1295                        ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1296                }
1297                goto dont_forward;
1298        }
1299
1300        vif_table[vif].pkt_in++;
1301        vif_table[vif].bytes_in+=skb->len;
1302
1303        /*
1304         *      Forward the frame
1305         */
1306        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1307                if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1308                        if (psend != -1) {
1309                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1310                                if (skb2)
1311                                        ipmr_queue_xmit(skb2, cache, psend);
1312                        }
1313                        psend=ct;
1314                }
1315        }
1316        if (psend != -1) {
1317                if (local) {
1318                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1319                        if (skb2)
1320                                ipmr_queue_xmit(skb2, cache, psend);
1321                } else {
1322                        ipmr_queue_xmit(skb, cache, psend);
1323                        return 0;
1324                }
1325        }
1326
1327dont_forward:
1328        if (!local)
1329                kfree_skb(skb);
1330        return 0;
1331}
1332
1333
1334/*
1335 *      Multicast packets for forwarding arrive here
1336 */
1337
1338int ip_mr_input(struct sk_buff *skb)
1339{
1340        struct mfc_cache *cache;
1341        int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1342
1343        /* Packet is looped back after forward, it should not be
1344           forwarded second time, but still can be delivered locally.
1345         */
1346        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1347                goto dont_forward;
1348
1349        if (!local) {
1350                    if (IPCB(skb)->opt.router_alert) {
1351                            if (ip_call_ra_chain(skb))
1352                                    return 0;
1353                    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1354                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1355                               Cisco IOS <= 11.2(8)) do not put router alert
1356                               option to IGMP packets destined to routable
1357                               groups. It is very bad, because it means
1358                               that we can forward NO IGMP messages.
1359                             */
1360                            read_lock(&mrt_lock);
1361                            if (mroute_socket) {
1362                                    nf_reset(skb);
1363                                    raw_rcv(mroute_socket, skb);
1364                                    read_unlock(&mrt_lock);
1365                                    return 0;
1366                            }
1367                            read_unlock(&mrt_lock);
1368                    }
1369        }
1370
1371        read_lock(&mrt_lock);
1372        cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1373
1374        /*
1375         *      No usable cache entry
1376         */
1377        if (cache==NULL) {
1378                int vif;
1379
1380                if (local) {
1381                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1382                        ip_local_deliver(skb);
1383                        if (skb2 == NULL) {
1384                                read_unlock(&mrt_lock);
1385                                return -ENOBUFS;
1386                        }
1387                        skb = skb2;
1388                }
1389
1390                vif = ipmr_find_vif(skb->dev);
1391                if (vif >= 0) {
1392                        int err = ipmr_cache_unresolved(vif, skb);
1393                        read_unlock(&mrt_lock);
1394
1395                        return err;
1396                }
1397                read_unlock(&mrt_lock);
1398                kfree_skb(skb);
1399                return -ENODEV;
1400        }
1401
1402        ip_mr_forward(skb, cache, local);
1403
1404        read_unlock(&mrt_lock);
1405
1406        if (local)
1407                return ip_local_deliver(skb);
1408
1409        return 0;
1410
1411dont_forward:
1412        if (local)
1413                return ip_local_deliver(skb);
1414        kfree_skb(skb);
1415        return 0;
1416}
1417
1418#ifdef CONFIG_IP_PIMSM_V1
1419/*
1420 * Handle IGMP messages of PIMv1
1421 */
1422
1423int pim_rcv_v1(struct sk_buff * skb)
1424{
1425        struct igmphdr *pim;
1426        struct iphdr   *encap;
1427        struct net_device  *reg_dev = NULL;
1428
1429        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1430                goto drop;
1431
1432        pim = (struct igmphdr*)skb->h.raw;
1433
1434        if (!mroute_do_pim ||
1435            skb->len < sizeof(*pim) + sizeof(*encap) ||
1436            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 
1437                goto drop;
1438
1439        encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1440        /*
1441           Check that:
1442           a. packet is really destinted to a multicast group
1443           b. packet is not a NULL-REGISTER
1444           c. packet is not truncated
1445         */
1446        if (!MULTICAST(encap->daddr) ||
1447            encap->tot_len == 0 ||
1448            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1449                goto drop;
1450
1451        read_lock(&mrt_lock);
1452        if (reg_vif_num >= 0)
1453                reg_dev = vif_table[reg_vif_num].dev;
1454        if (reg_dev)
1455                dev_hold(reg_dev);
1456        read_unlock(&mrt_lock);
1457
1458        if (reg_dev == NULL) 
1459                goto drop;
1460
1461        skb->mac.raw = skb->nh.raw;
1462        skb_pull(skb, (u8*)encap - skb->data);
1463        skb->nh.iph = (struct iphdr *)skb->data;
1464        skb->dev = reg_dev;
1465        skb->protocol = htons(ETH_P_IP);
1466        skb->ip_summed = 0;
1467        skb->pkt_type = PACKET_HOST;
1468        dst_release(skb->dst);
1469        skb->dst = NULL;
1470        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1471        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1472        nf_reset(skb);
1473        netif_rx(skb);
1474        dev_put(reg_dev);
1475        return 0;
1476 drop:
1477        kfree_skb(skb);
1478        return 0;
1479}
1480#endif
1481
1482#ifdef CONFIG_IP_PIMSM_V2
1483static int pim_rcv(struct sk_buff * skb)
1484{
1485        struct pimreghdr *pim;
1486        struct iphdr   *encap;
1487        struct net_device  *reg_dev = NULL;
1488
1489        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1490                goto drop;
1491
1492        pim = (struct pimreghdr*)skb->h.raw;
1493        if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1494            (pim->flags&PIM_NULL_REGISTER) ||
1495            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 
1496             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1497                goto drop;
1498
1499        /* check if the inner packet is destined to mcast group */
1500        encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1501        if (!MULTICAST(encap->daddr) ||
1502            encap->tot_len == 0 ||
1503            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1504                goto drop;
1505
1506        read_lock(&mrt_lock);
1507        if (reg_vif_num >= 0)
1508                reg_dev = vif_table[reg_vif_num].dev;
1509        if (reg_dev)
1510                dev_hold(reg_dev);
1511        read_unlock(&mrt_lock);
1512
1513        if (reg_dev == NULL) 
1514                goto drop;
1515
1516        skb->mac.raw = skb->nh.raw;
1517        skb_pull(skb, (u8*)encap - skb->data);
1518        skb->nh.iph = (struct iphdr *)skb->data;
1519        skb->dev = reg_dev;
1520        skb->protocol = htons(ETH_P_IP);
1521        skb->ip_summed = 0;
1522        skb->pkt_type = PACKET_HOST;
1523        dst_release(skb->dst);
1524        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
1525        ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
1526        skb->dst = NULL;
1527        nf_reset(skb);
1528        netif_rx(skb);
1529        dev_put(reg_dev);
1530        return 0;
1531 drop:
1532        kfree_skb(skb);
1533        return 0;
1534}
1535#endif
1536
1537static int
1538ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1539{
1540        int ct;
1541        struct rtnexthop *nhp;
1542        struct net_device *dev = vif_table[c->mfc_parent].dev;
1543        u8 *b = skb->tail;
1544        struct rtattr *mp_head;
1545
1546        if (dev)
1547                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1548
1549        mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1550
1551        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1552                if (c->mfc_un.res.ttls[ct] < 255) {
1553                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1554                                goto rtattr_failure;
1555                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1556                        nhp->rtnh_flags = 0;
1557                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1558                        nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1559                        nhp->rtnh_len = sizeof(*nhp);
1560                }
1561        }
1562        mp_head->rta_type = RTA_MULTIPATH;
1563        mp_head->rta_len = skb->tail - (u8*)mp_head;
1564        rtm->rtm_type = RTN_MULTICAST;
1565        return 1;
1566
1567rtattr_failure:
1568        skb_trim(skb, b - skb->data);
1569        return -EMSGSIZE;
1570}
1571
1572int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1573{
1574        int err;
1575        struct mfc_cache *cache;
1576        struct rtable *rt = (struct rtable*)skb->dst;
1577
1578        read_lock(&mrt_lock);
1579        cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1580
1581        if (cache==NULL) {
1582                struct sk_buff *skb2;
1583                struct net_device *dev;
1584                int vif;
1585
1586                if (nowait) {
1587                        read_unlock(&mrt_lock);
1588                        return -EAGAIN;
1589                }
1590
1591                dev = skb->dev;
1592                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1593                        read_unlock(&mrt_lock);
1594                        return -ENODEV;
1595                }
1596                skb2 = skb_clone(skb, GFP_ATOMIC);
1597                if (!skb2) {
1598                        read_unlock(&mrt_lock);
1599                        return -ENOMEM;
1600                }
1601
1602                skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
1603                skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
1604                skb2->nh.iph->saddr = rt->rt_src;
1605                skb2->nh.iph->daddr = rt->rt_dst;
1606                skb2->nh.iph->version = 0;
1607                err = ipmr_cache_unresolved(vif, skb2);
1608                read_unlock(&mrt_lock);
1609                return err;
1610        }
1611
1612        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1613                cache->mfc_flags |= MFC_NOTIFY;
1614        err = ipmr_fill_mroute(skb, cache, rtm);
1615        read_unlock(&mrt_lock);
1616        return err;
1617}
1618
1619#ifdef CONFIG_PROC_FS   
1620/*
1621 *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1622 */
1623struct ipmr_vif_iter {
1624        int ct;
1625};
1626
1627static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1628                                           loff_t pos)
1629{
1630        for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1631                if(!VIF_EXISTS(iter->ct))
1632                        continue;
1633                if (pos-- == 0) 
1634                        return &vif_table[iter->ct];
1635        }
1636        return NULL;
1637}
1638
1639static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1640{
1641        read_lock(&mrt_lock);
1642        return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 
1643                : SEQ_START_TOKEN;
1644}
1645
1646static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1647{
1648        struct ipmr_vif_iter *iter = seq->private;
1649
1650        ++*pos;
1651        if (v == SEQ_START_TOKEN)
1652                return ipmr_vif_seq_idx(iter, 0);
1653        
1654        while (++iter->ct < maxvif) {
1655                if(!VIF_EXISTS(iter->ct))
1656                        continue;
1657                return &vif_table[iter->ct];
1658        }
1659        return NULL;
1660}
1661
1662static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1663{
1664        read_unlock(&mrt_lock);
1665}
1666
1667static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1668{
1669        if (v == SEQ_START_TOKEN) {
1670                seq_puts(seq, 
1671                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1672        } else {
1673                const struct vif_device *vif = v;
1674                const char *name =  vif->dev ? vif->dev->name : "none";
1675
1676                seq_printf(seq,
1677                           "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1678                           vif - vif_table,
1679                           name, vif->bytes_in, vif->pkt_in, 
1680                           vif->bytes_out, vif->pkt_out,
1681                           vif->flags, vif->local, vif->remote);
1682        }
1683        return 0;
1684}
1685
1686static struct seq_operations ipmr_vif_seq_ops = {
1687        .start = ipmr_vif_seq_start,
1688        .next  = ipmr_vif_seq_next,
1689        .stop  = ipmr_vif_seq_stop,
1690        .show  = ipmr_vif_seq_show,
1691};
1692
1693static int ipmr_vif_open(struct inode *inode, struct file *file)
1694{
1695        struct seq_file *seq;
1696        int rc = -ENOMEM;
1697        struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1698       
1699        if (!s)
1700                goto out;
1701
1702        rc = seq_open(file, &ipmr_vif_seq_ops);
1703        if (rc)
1704                goto out_kfree;
1705
1706        s->ct = 0;
1707        seq = file->private_data;
1708        seq->private = s;
1709out:
1710        return rc;
1711out_kfree:
1712        kfree(s);
1713        goto out;
1714
1715}
1716
1717static struct file_operations ipmr_vif_fops = {
1718        .owner   = THIS_MODULE,
1719        .open    = ipmr_vif_open,
1720        .read    = seq_read,
1721        .llseek  = seq_lseek,
1722        .release = seq_release_private,
1723};
1724
1725struct ipmr_mfc_iter {
1726        struct mfc_cache **cache;
1727        int ct;
1728};
1729
1730
1731static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1732{
1733        struct mfc_cache *mfc;
1734
1735        it->cache = mfc_cache_array;
1736        read_lock(&mrt_lock);
1737        for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 
1738                for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 
1739                        if (pos-- == 0) 
1740                                return mfc;
1741        read_unlock(&mrt_lock);
1742
1743        it->cache = &mfc_unres_queue;
1744        spin_lock_bh(&mfc_unres_lock);
1745        for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 
1746                if (pos-- == 0)
1747                        return mfc;
1748        spin_unlock_bh(&mfc_unres_lock);
1749
1750        it->cache = NULL;
1751        return NULL;
1752}
1753
1754
1755static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1756{
1757        struct ipmr_mfc_iter *it = seq->private;
1758        it->cache = NULL;
1759        it->ct = 0;
1760        return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 
1761                : SEQ_START_TOKEN;
1762}
1763
1764static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1765{
1766        struct mfc_cache *mfc = v;
1767        struct ipmr_mfc_iter *it = seq->private;
1768
1769        ++*pos;
1770
1771        if (v == SEQ_START_TOKEN)
1772                return ipmr_mfc_seq_idx(seq->private, 0);
1773
1774        if (mfc->next)
1775                return mfc->next;
1776        
1777        if (it->cache == &mfc_unres_queue) 
1778                goto end_of_list;
1779
1780        BUG_ON(it->cache != mfc_cache_array);
1781
1782        while (++it->ct < MFC_LINES) {
1783                mfc = mfc_cache_array[it->ct];
1784                if (mfc)
1785                        return mfc;
1786        }
1787
1788        /* exhausted cache_array, show unresolved */
1789        read_unlock(&mrt_lock);
1790        it->cache = &mfc_unres_queue;
1791        it->ct = 0;
1792                
1793        spin_lock_bh(&mfc_unres_lock);
1794        mfc = mfc_unres_queue;
1795        if (mfc) 
1796                return mfc;
1797
1798 end_of_list:
1799        spin_unlock_bh(&mfc_unres_lock);
1800        it->cache = NULL;
1801
1802        return NULL;
1803}
1804
1805static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1806{
1807        struct ipmr_mfc_iter *it = seq->private;
1808
1809        if (it->cache == &mfc_unres_queue)
1810                spin_unlock_bh(&mfc_unres_lock);
1811        else if (it->cache == mfc_cache_array)
1812                read_unlock(&mrt_lock);
1813}
1814
1815static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1816{
1817        int n;
1818
1819        if (v == SEQ_START_TOKEN) {
1820                seq_puts(seq, 
1821                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1822        } else {
1823                const struct mfc_cache *mfc = v;
1824                const struct ipmr_mfc_iter *it = seq->private;
1825                
1826                seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1827                           (unsigned long) mfc->mfc_mcastgrp,
1828                           (unsigned long) mfc->mfc_origin,
1829                           mfc->mfc_parent,
1830                           mfc->mfc_un.res.pkt,
1831                           mfc->mfc_un.res.bytes,
1832                           mfc->mfc_un.res.wrong_if);
1833
1834                if (it->cache != &mfc_unres_queue) {
1835                        for(n = mfc->mfc_un.res.minvif; 
1836                            n < mfc->mfc_un.res.maxvif; n++ ) {
1837                                if(VIF_EXISTS(n) 
1838                                   && mfc->mfc_un.res.ttls[n] < 255)
1839                                seq_printf(seq, 
1840                                           " %2d:%-3d", 
1841                                           n, mfc->mfc_un.res.ttls[n]);
1842                        }
1843                }
1844                seq_putc(seq, '\n');
1845        }
1846        return 0;
1847}
1848
1849static struct seq_operations ipmr_mfc_seq_ops = {
1850        .start = ipmr_mfc_seq_start,
1851        .next  = ipmr_mfc_seq_next,
1852        .stop  = ipmr_mfc_seq_stop,
1853        .show  = ipmr_mfc_seq_show,
1854};
1855
1856static int ipmr_mfc_open(struct inode *inode, struct file *file)
1857{
1858        struct seq_file *seq;
1859        int rc = -ENOMEM;
1860        struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1861       
1862        if (!s)
1863                goto out;
1864
1865        rc = seq_open(file, &ipmr_mfc_seq_ops);
1866        if (rc)
1867                goto out_kfree;
1868
1869        seq = file->private_data;
1870        seq->private = s;
1871out:
1872        return rc;
1873out_kfree:
1874        kfree(s);
1875        goto out;
1876
1877}
1878
1879static struct file_operations ipmr_mfc_fops = {
1880        .owner   = THIS_MODULE,
1881        .open    = ipmr_mfc_open,
1882        .read    = seq_read,
1883        .llseek  = seq_lseek,
1884        .release = seq_release_private,
1885};
1886#endif  
1887
1888#ifdef CONFIG_IP_PIMSM_V2
1889static struct net_protocol pim_protocol = {
1890        .handler        =       pim_rcv,
1891};
1892#endif
1893
1894
1895/*
1896 *      Setup for IP multicast routing
1897 */
1898 
1899void __init ip_mr_init(void)
1900{
1901        mrt_cachep = kmem_cache_create("ip_mrt_cache",
1902                                       sizeof(struct mfc_cache),
1903                                       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1904                                       NULL, NULL);
1905        init_timer(&ipmr_expire_timer);
1906        ipmr_expire_timer.function=ipmr_expire_process;
1907        register_netdevice_notifier(&ip_mr_notifier);
1908#ifdef CONFIG_PROC_FS   
1909        proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1910        proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1911#endif  
1912}
1913
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.