linux-old/net/ipv4/ipmr.c
<<
>>
Prefs
   1/*
   2 *      IP multicast routing support for mrouted 3.6/3.8
   3 *
   4 *              (c) 1995 Alan Cox, <alan@redhat.com>
   5 *        Linux Consultancy and Custom Driver Development
   6 *
   7 *      This program is free software; you can redistribute it and/or
   8 *      modify it under the terms of the GNU General Public License
   9 *      as published by the Free Software Foundation; either version
  10 *      2 of the License, or (at your option) any later version.
  11 *
  12 *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
  13 *
  14 *      Fixes:
  15 *      Michael Chastain        :       Incorrect size of copying.
  16 *      Alan Cox                :       Added the cache manager code
  17 *      Alan Cox                :       Fixed the clone/copy bug and device race.
  18 *      Mike McLagan            :       Routing by source
  19 *      Malcolm Beattie         :       Buffer handling fixes.
  20 *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  21 *      SVR Anand               :       Fixed several multicast bugs and problems.
  22 *      Alexey Kuznetsov        :       Status, optimisations and more.
  23 *      Brad Parker             :       Better behaviour on mrouted upcall
  24 *                                      overflow.
  25 *      Carlos Picoto           :       PIMv1 Support
  26 *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  27 *                                      Relax this requrement to work with older peers.
  28 *
  29 */
  30
  31#include <linux/config.h>
  32#include <asm/system.h>
  33#include <asm/uaccess.h>
  34#include <linux/types.h>
  35#include <linux/sched.h>
  36#include <linux/errno.h>
  37#include <linux/timer.h>
  38#include <linux/mm.h>
  39#include <linux/kernel.h>
  40#include <linux/fcntl.h>
  41#include <linux/stat.h>
  42#include <linux/socket.h>
  43#include <linux/in.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/inetdevice.h>
  47#include <linux/igmp.h>
  48#include <linux/proc_fs.h>
  49#include <linux/mroute.h>
  50#include <linux/init.h>
  51#include <net/ip.h>
  52#include <net/protocol.h>
  53#include <linux/skbuff.h>
  54#include <net/sock.h>
  55#include <net/icmp.h>
  56#include <net/udp.h>
  57#include <net/raw.h>
  58#include <linux/notifier.h>
  59#include <linux/if_arp.h>
  60#include <linux/netfilter_ipv4.h>
  61#include <net/ipip.h>
  62#include <net/checksum.h>
  63
  64#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  65#define CONFIG_IP_PIMSM 1
  66#endif
  67
  68static struct sock *mroute_socket;
  69
  70
  71/* Big lock, protecting vif table, mrt cache and mroute socket state.
  72   Note that the changes are semaphored via rtnl_lock.
  73 */
  74
  75static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
  76
  77/*
  78 *      Multicast router control variables
  79 */
  80
  81static struct vif_device vif_table[MAXVIFS];            /* Devices              */
  82static int maxvif;
  83
  84#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
  85
  86int mroute_do_assert;                                   /* Set in PIM assert    */
  87int mroute_do_pim;
  88
  89static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
  90
  91static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  92atomic_t cache_resolve_queue_len;                       /* Size of unresolved   */
  93
  94/* Special spinlock for queue of unresolved entries */
  95static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
  96
  97/* We return to original Alan's scheme. Hash table of resolved
  98   entries is changed only in process context and protected
  99   with weak lock mrt_lock. Queue of unresolved entries is protected
 100   with strong spinlock mfc_unres_lock.
 101
 102   In this case data path is free of exclusive locks at all.
 103 */
 104
 105kmem_cache_t *mrt_cachep;
 106
 107static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
 108static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
 109static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 110
 111extern struct inet_protocol pim_protocol;
 112
 113static struct timer_list ipmr_expire_timer;
 114
 115/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 116
 117static
 118struct net_device *ipmr_new_tunnel(struct vifctl *v)
 119{
 120        struct net_device  *dev;
 121
 122        dev = __dev_get_by_name("tunl0");
 123
 124        if (dev) {
 125                int err;
 126                struct ifreq ifr;
 127                mm_segment_t    oldfs;
 128                struct ip_tunnel_parm p;
 129                struct in_device  *in_dev;
 130
 131                memset(&p, 0, sizeof(p));
 132                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 133                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 134                p.iph.version = 4;
 135                p.iph.ihl = 5;
 136                p.iph.protocol = IPPROTO_IPIP;
 137                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 138                ifr.ifr_ifru.ifru_data = (void*)&p;
 139
 140                oldfs = get_fs(); set_fs(KERNEL_DS);
 141                err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 142                set_fs(oldfs);
 143
 144                dev = NULL;
 145
 146                if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
 147                        dev->flags |= IFF_MULTICAST;
 148
 149                        in_dev = __in_dev_get(dev);
 150                        if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
 151                                goto failure;
 152                        in_dev->cnf.rp_filter = 0;
 153
 154                        if (dev_open(dev))
 155                                goto failure;
 156                }
 157        }
 158        return dev;
 159
 160failure:
 161        unregister_netdevice(dev);
 162        return NULL;
 163}
 164
 165#ifdef CONFIG_IP_PIMSM
 166
 167static int reg_vif_num = -1;
 168
 169static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 170{
 171        read_lock(&mrt_lock);
 172        ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
 173        ((struct net_device_stats*)dev->priv)->tx_packets++;
 174        ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 175        read_unlock(&mrt_lock);
 176        kfree_skb(skb);
 177        return 0;
 178}
 179
 180static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
 181{
 182        return (struct net_device_stats*)dev->priv;
 183}
 184
 185static
 186struct net_device *ipmr_reg_vif(struct vifctl *v)
 187{
 188        struct net_device  *dev;
 189        struct in_device *in_dev;
 190        int size;
 191
 192        size = sizeof(*dev) + sizeof(struct net_device_stats);
 193        dev = kmalloc(size, GFP_KERNEL);
 194        if (!dev)
 195                return NULL;
 196
 197        memset(dev, 0, size);
 198
 199        dev->priv = dev + 1;
 200
 201        strcpy(dev->name, "pimreg");
 202
 203        dev->type               = ARPHRD_PIMREG;
 204        dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
 205        dev->flags              = IFF_NOARP;
 206        dev->hard_start_xmit    = reg_vif_xmit;
 207        dev->get_stats          = reg_vif_get_stats;
 208        dev->features           |= NETIF_F_DYNALLOC;
 209
 210        if (register_netdevice(dev)) {
 211                kfree(dev);
 212                return NULL;
 213        }
 214        dev->iflink = 0;
 215
 216        if ((in_dev = inetdev_init(dev)) == NULL)
 217                goto failure;
 218
 219        in_dev->cnf.rp_filter = 0;
 220
 221        if (dev_open(dev))
 222                goto failure;
 223
 224        return dev;
 225
 226failure:
 227        unregister_netdevice(dev);
 228        return NULL;
 229}
 230#endif
 231
 232/*
 233 *      Delete a VIF entry
 234 */
 235 
 236static int vif_delete(int vifi)
 237{
 238        struct vif_device *v;
 239        struct net_device *dev;
 240        struct in_device *in_dev;
 241
 242        if (vifi < 0 || vifi >= maxvif)
 243                return -EADDRNOTAVAIL;
 244
 245        v = &vif_table[vifi];
 246
 247        write_lock_bh(&mrt_lock);
 248        dev = v->dev;
 249        v->dev = NULL;
 250
 251        if (!dev) {
 252                write_unlock_bh(&mrt_lock);
 253                return -EADDRNOTAVAIL;
 254        }
 255
 256#ifdef CONFIG_IP_PIMSM
 257        if (vifi == reg_vif_num)
 258                reg_vif_num = -1;
 259#endif
 260
 261        if (vifi+1 == maxvif) {
 262                int tmp;
 263                for (tmp=vifi-1; tmp>=0; tmp--) {
 264                        if (VIF_EXISTS(tmp))
 265                                break;
 266                }
 267                maxvif = tmp+1;
 268        }
 269
 270        write_unlock_bh(&mrt_lock);
 271
 272        dev_set_allmulti(dev, -1);
 273
 274        if ((in_dev = __in_dev_get(dev)) != NULL) {
 275                in_dev->cnf.mc_forwarding--;
 276                ip_rt_multicast_event(in_dev);
 277        }
 278
 279        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 280                unregister_netdevice(dev);
 281
 282        dev_put(dev);
 283        return 0;
 284}
 285
 286/* Destroy an unresolved cache entry, killing queued skbs
 287   and reporting error to netlink readers.
 288 */
 289
 290static void ipmr_destroy_unres(struct mfc_cache *c)
 291{
 292        struct sk_buff *skb;
 293
 294        atomic_dec(&cache_resolve_queue_len);
 295
 296        while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
 297                if (skb->nh.iph->version == 0) {
 298                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 299                        nlh->nlmsg_type = NLMSG_ERROR;
 300                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 301                        skb_trim(skb, nlh->nlmsg_len);
 302                        ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
 303                        netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
 304                } else
 305                        kfree_skb(skb);
 306        }
 307
 308        kmem_cache_free(mrt_cachep, c);
 309}
 310
 311
 312/* Single timer process for all the unresolved queue. */
 313
 314void ipmr_expire_process(unsigned long dummy)
 315{
 316        unsigned long now;
 317        unsigned long expires;
 318        struct mfc_cache *c, **cp;
 319
 320        if (!spin_trylock(&mfc_unres_lock)) {
 321                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 322                return;
 323        }
 324
 325        if (atomic_read(&cache_resolve_queue_len) == 0)
 326                goto out;
 327
 328        now = jiffies;
 329        expires = 10*HZ;
 330        cp = &mfc_unres_queue;
 331
 332        while ((c=*cp) != NULL) {
 333                long interval = c->mfc_un.unres.expires - now;
 334
 335                if (interval > 0) {
 336                        if (interval < expires)
 337                                expires = interval;
 338                        cp = &c->next;
 339                        continue;
 340                }
 341
 342                *cp = c->next;
 343
 344                ipmr_destroy_unres(c);
 345        }
 346
 347        if (atomic_read(&cache_resolve_queue_len))
 348                mod_timer(&ipmr_expire_timer, jiffies + expires);
 349
 350out:
 351        spin_unlock(&mfc_unres_lock);
 352}
 353
 354/* Fill oifs list. It is called under write locked mrt_lock. */
 355
 356static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
 357{
 358        int vifi;
 359
 360        cache->mfc_un.res.minvif = MAXVIFS;
 361        cache->mfc_un.res.maxvif = 0;
 362        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 363
 364        for (vifi=0; vifi<maxvif; vifi++) {
 365                if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
 366                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 367                        if (cache->mfc_un.res.minvif > vifi)
 368                                cache->mfc_un.res.minvif = vifi;
 369                        if (cache->mfc_un.res.maxvif <= vifi)
 370                                cache->mfc_un.res.maxvif = vifi + 1;
 371                }
 372        }
 373}
 374
 375static int vif_add(struct vifctl *vifc, int mrtsock)
 376{
 377        int vifi = vifc->vifc_vifi;
 378        struct vif_device *v = &vif_table[vifi];
 379        struct net_device *dev;
 380        struct in_device *in_dev;
 381
 382        /* Is vif busy ? */
 383        if (VIF_EXISTS(vifi))
 384                return -EADDRINUSE;
 385
 386        switch (vifc->vifc_flags) {
 387#ifdef CONFIG_IP_PIMSM
 388        case VIFF_REGISTER:
 389                /*
 390                 * Special Purpose VIF in PIM
 391                 * All the packets will be sent to the daemon
 392                 */
 393                if (reg_vif_num >= 0)
 394                        return -EADDRINUSE;
 395                dev = ipmr_reg_vif(vifc);
 396                if (!dev)
 397                        return -ENOBUFS;
 398                break;
 399#endif
 400        case VIFF_TUNNEL:       
 401                dev = ipmr_new_tunnel(vifc);
 402                if (!dev)
 403                        return -ENOBUFS;
 404                break;
 405        case 0:
 406                dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
 407                if (!dev)
 408                        return -EADDRNOTAVAIL;
 409                __dev_put(dev);
 410                break;
 411        default:
 412                return -EINVAL;
 413        }
 414
 415        if ((in_dev = __in_dev_get(dev)) == NULL)
 416                return -EADDRNOTAVAIL;
 417        in_dev->cnf.mc_forwarding++;
 418        dev_set_allmulti(dev, +1);
 419        ip_rt_multicast_event(in_dev);
 420
 421        /*
 422         *      Fill in the VIF structures
 423         */
 424        v->rate_limit=vifc->vifc_rate_limit;
 425        v->local=vifc->vifc_lcl_addr.s_addr;
 426        v->remote=vifc->vifc_rmt_addr.s_addr;
 427        v->flags=vifc->vifc_flags;
 428        if (!mrtsock)
 429                v->flags |= VIFF_STATIC;
 430        v->threshold=vifc->vifc_threshold;
 431        v->bytes_in = 0;
 432        v->bytes_out = 0;
 433        v->pkt_in = 0;
 434        v->pkt_out = 0;
 435        v->link = dev->ifindex;
 436        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 437                v->link = dev->iflink;
 438
 439        /* And finish update writing critical data */
 440        write_lock_bh(&mrt_lock);
 441        dev_hold(dev);
 442        v->dev=dev;
 443#ifdef CONFIG_IP_PIMSM
 444        if (v->flags&VIFF_REGISTER)
 445                reg_vif_num = vifi;
 446#endif
 447        if (vifi+1 > maxvif)
 448                maxvif = vifi+1;
 449        write_unlock_bh(&mrt_lock);
 450        return 0;
 451}
 452
 453static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
 454{
 455        int line=MFC_HASH(mcastgrp,origin);
 456        struct mfc_cache *c;
 457
 458        for (c=mfc_cache_array[line]; c; c = c->next) {
 459                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 460                        break;
 461        }
 462        return c;
 463}
 464
 465/*
 466 *      Allocate a multicast cache entry
 467 */
 468static struct mfc_cache *ipmr_cache_alloc(void)
 469{
 470        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
 471        if(c==NULL)
 472                return NULL;
 473        memset(c, 0, sizeof(*c));
 474        c->mfc_un.res.minvif = MAXVIFS;
 475        return c;
 476}
 477
 478static struct mfc_cache *ipmr_cache_alloc_unres(void)
 479{
 480        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
 481        if(c==NULL)
 482                return NULL;
 483        memset(c, 0, sizeof(*c));
 484        skb_queue_head_init(&c->mfc_un.unres.unresolved);
 485        c->mfc_un.unres.expires = jiffies + 10*HZ;
 486        return c;
 487}
 488
 489/*
 490 *      A cache entry has gone into a resolved state from queued
 491 */
 492 
 493static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 494{
 495        struct sk_buff *skb;
 496
 497        /*
 498         *      Play the pending entries through our router
 499         */
 500
 501        while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 502                if (skb->nh.iph->version == 0) {
 503                        int err;
 504                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 505
 506                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 507                                nlh->nlmsg_len = skb->tail - (u8*)nlh;
 508                        } else {
 509                                nlh->nlmsg_type = NLMSG_ERROR;
 510                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 511                                skb_trim(skb, nlh->nlmsg_len);
 512                                ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 513                        }
 514                        err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
 515                } else
 516                        ip_mr_forward(skb, c, 0);
 517        }
 518}
 519
 520/*
 521 *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 522 *      expects the following bizarre scheme.
 523 *
 524 *      Called under mrt_lock.
 525 */
 526 
 527static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 528{
 529        struct sk_buff *skb;
 530        int ihl = pkt->nh.iph->ihl<<2;
 531        struct igmphdr *igmp;
 532        struct igmpmsg *msg;
 533        int ret;
 534
 535#ifdef CONFIG_IP_PIMSM
 536        if (assert == IGMPMSG_WHOLEPKT)
 537                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 538        else
 539#endif
 540                skb = alloc_skb(128, GFP_ATOMIC);
 541
 542        if(!skb)
 543                return -ENOBUFS;
 544
 545#ifdef CONFIG_IP_PIMSM
 546        if (assert == IGMPMSG_WHOLEPKT) {
 547                /* Ugly, but we have no choice with this interface.
 548                   Duplicate old header, fix ihl, length etc.
 549                   And all this only to mangle msg->im_msgtype and
 550                   to set msg->im_mbz to "mbz" :-)
 551                 */
 552                msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
 553                skb->nh.raw = skb->h.raw = (u8*)msg;
 554                memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
 555                msg->im_msgtype = IGMPMSG_WHOLEPKT;
 556                msg->im_mbz = 0;
 557                msg->im_vif = reg_vif_num;
 558                skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
 559                skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
 560        } else 
 561#endif
 562        {       
 563                
 564        /*
 565         *      Copy the IP header
 566         */
 567
 568        skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
 569        memcpy(skb->data,pkt->data,ihl);
 570        skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
 571        msg = (struct igmpmsg*)skb->nh.iph;
 572        msg->im_vif = vifi;
 573        skb->dst = dst_clone(pkt->dst);
 574
 575        /*
 576         *      Add our header
 577         */
 578
 579        igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
 580        igmp->type      =
 581        msg->im_msgtype = assert;
 582        igmp->code      =       0;
 583        skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
 584        skb->h.raw = skb->nh.raw;
 585        }
 586
 587        if (mroute_socket == NULL) {
 588                kfree_skb(skb);
 589                return -EINVAL;
 590        }
 591
 592        /*
 593         *      Deliver to mrouted
 594         */
 595        if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
 596                if (net_ratelimit())
 597                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 598                kfree_skb(skb);
 599        }
 600
 601        return ret;
 602}
 603
 604/*
 605 *      Queue a packet for resolution. It gets locked cache entry!
 606 */
 607 
 608static int
 609ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 610{
 611        int err;
 612        struct mfc_cache *c;
 613
 614        spin_lock_bh(&mfc_unres_lock);
 615        for (c=mfc_unres_queue; c; c=c->next) {
 616                if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
 617                    c->mfc_origin == skb->nh.iph->saddr)
 618                        break;
 619        }
 620
 621        if (c == NULL) {
 622                /*
 623                 *      Create a new entry if allowable
 624                 */
 625
 626                if (atomic_read(&cache_resolve_queue_len)>=10 ||
 627                    (c=ipmr_cache_alloc_unres())==NULL) {
 628                        spin_unlock_bh(&mfc_unres_lock);
 629
 630                        kfree_skb(skb);
 631                        return -ENOBUFS;
 632                }
 633
 634                /*
 635                 *      Fill in the new cache entry
 636                 */
 637                c->mfc_parent=-1;
 638                c->mfc_origin=skb->nh.iph->saddr;
 639                c->mfc_mcastgrp=skb->nh.iph->daddr;
 640
 641                /*
 642                 *      Reflect first query at mrouted.
 643                 */
 644                if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
 645                        /* If the report failed throw the cache entry 
 646                           out - Brad Parker
 647                         */
 648                        spin_unlock_bh(&mfc_unres_lock);
 649
 650                        kmem_cache_free(mrt_cachep, c);
 651                        kfree_skb(skb);
 652                        return err;
 653                }
 654
 655                atomic_inc(&cache_resolve_queue_len);
 656                c->next = mfc_unres_queue;
 657                mfc_unres_queue = c;
 658
 659                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 660        }
 661
 662        /*
 663         *      See if we can append the packet
 664         */
 665        if (c->mfc_un.unres.unresolved.qlen>3) {
 666                kfree_skb(skb);
 667                err = -ENOBUFS;
 668        } else {
 669                skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
 670                err = 0;
 671        }
 672
 673        spin_unlock_bh(&mfc_unres_lock);
 674        return err;
 675}
 676
 677/*
 678 *      MFC cache manipulation by user space mroute daemon
 679 */
 680
 681int ipmr_mfc_delete(struct mfcctl *mfc)
 682{
 683        int line;
 684        struct mfc_cache *c, **cp;
 685
 686        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 687
 688        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 689                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 690                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 691                        write_lock_bh(&mrt_lock);
 692                        *cp = c->next;
 693                        write_unlock_bh(&mrt_lock);
 694
 695                        kmem_cache_free(mrt_cachep, c);
 696                        return 0;
 697                }
 698        }
 699        return -ENOENT;
 700}
 701
 702int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 703{
 704        int line;
 705        struct mfc_cache *uc, *c, **cp;
 706
 707        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 708
 709        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 710                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 711                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 712                        break;
 713        }
 714
 715        if (c != NULL) {
 716                write_lock_bh(&mrt_lock);
 717                c->mfc_parent = mfc->mfcc_parent;
 718                ipmr_update_threshoulds(c, mfc->mfcc_ttls);
 719                if (!mrtsock)
 720                        c->mfc_flags |= MFC_STATIC;
 721                write_unlock_bh(&mrt_lock);
 722                return 0;
 723        }
 724
 725        if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
 726                return -EINVAL;
 727
 728        c=ipmr_cache_alloc();
 729        if (c==NULL)
 730                return -ENOMEM;
 731
 732        c->mfc_origin=mfc->mfcc_origin.s_addr;
 733        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
 734        c->mfc_parent=mfc->mfcc_parent;
 735        ipmr_update_threshoulds(c, mfc->mfcc_ttls);
 736        if (!mrtsock)
 737                c->mfc_flags |= MFC_STATIC;
 738
 739        write_lock_bh(&mrt_lock);
 740        c->next = mfc_cache_array[line];
 741        mfc_cache_array[line] = c;
 742        write_unlock_bh(&mrt_lock);
 743
 744        /*
 745         *      Check to see if we resolved a queued list. If so we
 746         *      need to send on the frames and tidy up.
 747         */
 748        spin_lock_bh(&mfc_unres_lock);
 749        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 750             cp = &uc->next) {
 751                if (uc->mfc_origin == c->mfc_origin &&
 752                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 753                        *cp = uc->next;
 754                        if (atomic_dec_and_test(&cache_resolve_queue_len))
 755                                del_timer(&ipmr_expire_timer);
 756                        break;
 757                }
 758        }
 759        spin_unlock_bh(&mfc_unres_lock);
 760
 761        if (uc) {
 762                ipmr_cache_resolve(uc, c);
 763                kmem_cache_free(mrt_cachep, uc);
 764        }
 765        return 0;
 766}
 767
 768/*
 769 *      Close the multicast socket, and clear the vif tables etc
 770 */
 771 
 772static void mroute_clean_tables(struct sock *sk)
 773{
 774        int i;
 775                
 776        /*
 777         *      Shut down all active vif entries
 778         */
 779        for(i=0; i<maxvif; i++) {
 780                if (!(vif_table[i].flags&VIFF_STATIC))
 781                        vif_delete(i);
 782        }
 783
 784        /*
 785         *      Wipe the cache
 786         */
 787        for (i=0;i<MFC_LINES;i++) {
 788                struct mfc_cache *c, **cp;
 789
 790                cp = &mfc_cache_array[i];
 791                while ((c = *cp) != NULL) {
 792                        if (c->mfc_flags&MFC_STATIC) {
 793                                cp = &c->next;
 794                                continue;
 795                        }
 796                        write_lock_bh(&mrt_lock);
 797                        *cp = c->next;
 798                        write_unlock_bh(&mrt_lock);
 799
 800                        kmem_cache_free(mrt_cachep, c);
 801                }
 802        }
 803
 804        if (atomic_read(&cache_resolve_queue_len) != 0) {
 805                struct mfc_cache *c;
 806
 807                spin_lock_bh(&mfc_unres_lock);
 808                while (mfc_unres_queue != NULL) {
 809                        c = mfc_unres_queue;
 810                        mfc_unres_queue = c->next;
 811                        spin_unlock_bh(&mfc_unres_lock);
 812
 813                        ipmr_destroy_unres(c);
 814
 815                        spin_lock_bh(&mfc_unres_lock);
 816                }
 817                spin_unlock_bh(&mfc_unres_lock);
 818        }
 819}
 820
 821static void mrtsock_destruct(struct sock *sk)
 822{
 823        rtnl_lock();
 824        if (sk == mroute_socket) {
 825                ipv4_devconf.mc_forwarding--;
 826
 827                write_lock_bh(&mrt_lock);
 828                mroute_socket=NULL;
 829                write_unlock_bh(&mrt_lock);
 830
 831                mroute_clean_tables(sk);
 832        }
 833        rtnl_unlock();
 834}
 835
 836/*
 837 *      Socket options and virtual interface manipulation. The whole
 838 *      virtual interface system is a complete heap, but unfortunately
 839 *      that's how BSD mrouted happens to think. Maybe one day with a proper
 840 *      MOSPF/PIM router set up we can clean this up.
 841 */
 842 
 843int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
 844{
 845        int ret;
 846        struct vifctl vif;
 847        struct mfcctl mfc;
 848        
 849        if(optname!=MRT_INIT)
 850        {
 851                if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
 852                        return -EACCES;
 853        }
 854
 855        switch(optname)
 856        {
 857                case MRT_INIT:
 858                        if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
 859                                return -EOPNOTSUPP;
 860                        if(optlen!=sizeof(int))
 861                                return -ENOPROTOOPT;
 862
 863                        rtnl_lock();
 864                        if (mroute_socket) {
 865                                rtnl_unlock();
 866                                return -EADDRINUSE;
 867                        }
 868
 869                        ret = ip_ra_control(sk, 1, mrtsock_destruct);
 870                        if (ret == 0) {
 871                                write_lock_bh(&mrt_lock);
 872                                mroute_socket=sk;
 873                                write_unlock_bh(&mrt_lock);
 874
 875                                ipv4_devconf.mc_forwarding++;
 876                        }
 877                        rtnl_unlock();
 878                        return ret;
 879                case MRT_DONE:
 880                        if (sk!=mroute_socket)
 881                                return -EACCES;
 882                        return ip_ra_control(sk, 0, NULL);
 883                case MRT_ADD_VIF:
 884                case MRT_DEL_VIF:
 885                        if(optlen!=sizeof(vif))
 886                                return -EINVAL;
 887                        if (copy_from_user(&vif,optval,sizeof(vif)))
 888                                return -EFAULT; 
 889                        if(vif.vifc_vifi >= MAXVIFS)
 890                                return -ENFILE;
 891                        rtnl_lock();
 892                        if (optname==MRT_ADD_VIF) {
 893                                ret = vif_add(&vif, sk==mroute_socket);
 894                        } else {
 895                                ret = vif_delete(vif.vifc_vifi);
 896                        }
 897                        rtnl_unlock();
 898                        return ret;
 899
 900                /*
 901                 *      Manipulate the forwarding caches. These live
 902                 *      in a sort of kernel/user symbiosis.
 903                 */
 904                case MRT_ADD_MFC:
 905                case MRT_DEL_MFC:
 906                        if(optlen!=sizeof(mfc))
 907                                return -EINVAL;
 908                        if (copy_from_user(&mfc,optval, sizeof(mfc)))
 909                                return -EFAULT;
 910                        rtnl_lock();
 911                        if (optname==MRT_DEL_MFC)
 912                                ret = ipmr_mfc_delete(&mfc);
 913                        else
 914                                ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
 915                        rtnl_unlock();
 916                        return ret;
 917                /*
 918                 *      Control PIM assert.
 919                 */
 920                case MRT_ASSERT:
 921                {
 922                        int v;
 923                        if(get_user(v,(int *)optval))
 924                                return -EFAULT;
 925                        mroute_do_assert=(v)?1:0;
 926                        return 0;
 927                }
 928#ifdef CONFIG_IP_PIMSM
 929                case MRT_PIM:
 930                {
 931                        int v;
 932                        if(get_user(v,(int *)optval))
 933                                return -EFAULT;
 934                        v = (v)?1:0;
 935                        rtnl_lock();
 936                        if (v != mroute_do_pim) {
 937                                mroute_do_pim = v;
 938                                mroute_do_assert = v;
 939#ifdef CONFIG_IP_PIMSM_V2
 940                                if (mroute_do_pim)
 941                                        inet_add_protocol(&pim_protocol);
 942                                else
 943                                        inet_del_protocol(&pim_protocol);
 944#endif
 945                        }
 946                        rtnl_unlock();
 947                        return 0;
 948                }
 949#endif
 950                /*
 951                 *      Spurious command, or MRT_VERSION which you cannot
 952                 *      set.
 953                 */
 954                default:
 955                        return -ENOPROTOOPT;
 956        }
 957}
 958
 959/*
 960 *      Getsock opt support for the multicast routing system.
 961 */
 962 
 963int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
 964{
 965        int olr;
 966        int val;
 967
 968        if(optname!=MRT_VERSION && 
 969#ifdef CONFIG_IP_PIMSM
 970           optname!=MRT_PIM &&
 971#endif
 972           optname!=MRT_ASSERT)
 973                return -ENOPROTOOPT;
 974
 975        if (get_user(olr, optlen))
 976                return -EFAULT;
 977
 978        olr = min_t(unsigned int, olr, sizeof(int));
 979        if (olr < 0)
 980                return -EINVAL;
 981                
 982        if(put_user(olr,optlen))
 983                return -EFAULT;
 984        if(optname==MRT_VERSION)
 985                val=0x0305;
 986#ifdef CONFIG_IP_PIMSM
 987        else if(optname==MRT_PIM)
 988                val=mroute_do_pim;
 989#endif
 990        else
 991                val=mroute_do_assert;
 992        if(copy_to_user(optval,&val,olr))
 993                return -EFAULT;
 994        return 0;
 995}
 996
 997/*
 998 *      The IP multicast ioctl support routines.
 999 */
1000 
1001int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1002{
1003        struct sioc_sg_req sr;
1004        struct sioc_vif_req vr;
1005        struct vif_device *vif;
1006        struct mfc_cache *c;
1007        
1008        switch(cmd)
1009        {
1010                case SIOCGETVIFCNT:
1011                        if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1012                                return -EFAULT; 
1013                        if(vr.vifi>=maxvif)
1014                                return -EINVAL;
1015                        read_lock(&mrt_lock);
1016                        vif=&vif_table[vr.vifi];
1017                        if(VIF_EXISTS(vr.vifi)) {
1018                                vr.icount=vif->pkt_in;
1019                                vr.ocount=vif->pkt_out;
1020                                vr.ibytes=vif->bytes_in;
1021                                vr.obytes=vif->bytes_out;
1022                                read_unlock(&mrt_lock);
1023
1024                                if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1025                                        return -EFAULT;
1026                                return 0;
1027                        }
1028                        read_unlock(&mrt_lock);
1029                        return -EADDRNOTAVAIL;
1030                case SIOCGETSGCNT:
1031                        if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1032                                return -EFAULT;
1033
1034                        read_lock(&mrt_lock);
1035                        c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1036                        if (c) {
1037                                sr.pktcnt = c->mfc_un.res.pkt;
1038                                sr.bytecnt = c->mfc_un.res.bytes;
1039                                sr.wrong_if = c->mfc_un.res.wrong_if;
1040                                read_unlock(&mrt_lock);
1041
1042                                if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1043                                        return -EFAULT;
1044                                return 0;
1045                        }
1046                        read_unlock(&mrt_lock);
1047                        return -EADDRNOTAVAIL;
1048                default:
1049                        return -ENOIOCTLCMD;
1050        }
1051}
1052
1053
1054static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1055{
1056        struct vif_device *v;
1057        int ct;
1058        if (event != NETDEV_UNREGISTER)
1059                return NOTIFY_DONE;
1060        v=&vif_table[0];
1061        for(ct=0;ct<maxvif;ct++,v++) {
1062                if (v->dev==ptr)
1063                        vif_delete(ct);
1064        }
1065        return NOTIFY_DONE;
1066}
1067
1068
1069static struct notifier_block ip_mr_notifier={
1070        ipmr_device_event,
1071        NULL,
1072        0
1073};
1074
1075/*
1076 *      Encapsulate a packet by attaching a valid IPIP header to it.
1077 *      This avoids tunnel drivers and other mess and gives us the speed so
1078 *      important for multicast video.
1079 */
1080 
1081static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1082{
1083        struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1084
1085        iph->version    =       4;
1086        iph->tos        =       skb->nh.iph->tos;
1087        iph->ttl        =       skb->nh.iph->ttl;
1088        iph->frag_off   =       0;
1089        iph->daddr      =       daddr;
1090        iph->saddr      =       saddr;
1091        iph->protocol   =       IPPROTO_IPIP;
1092        iph->ihl        =       5;
1093        iph->tot_len    =       htons(skb->len);
1094        ip_select_ident(iph, skb->dst, NULL);
1095        ip_send_check(iph);
1096
1097        skb->h.ipiph = skb->nh.iph;
1098        skb->nh.iph = iph;
1099        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1100        nf_reset(skb);
1101}
1102
1103static inline int ipmr_forward_finish(struct sk_buff *skb)
1104{
1105        struct ip_options *opt = &(IPCB(skb)->opt);
1106        struct dst_entry *dst = skb->dst;
1107
1108        if (unlikely(opt->optlen))
1109                ip_forward_options(skb);
1110
1111        if (skb->len <= dst->pmtu)
1112                return dst->output(skb);
1113        else
1114                return ip_fragment(skb, dst->output);
1115}
1116
1117/*
1118 *      Processing handlers for ipmr_forward
1119 */
1120
1121static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1122                           int vifi, int last)
1123{
1124        struct iphdr *iph = skb->nh.iph;
1125        struct vif_device *vif = &vif_table[vifi];
1126        struct net_device *dev;
1127        struct rtable *rt;
1128        int    encap = 0;
1129        struct sk_buff *skb2;
1130
1131        if (vif->dev == NULL)
1132                return;
1133
1134#ifdef CONFIG_IP_PIMSM
1135        if (vif->flags & VIFF_REGISTER) {
1136                vif->pkt_out++;
1137                vif->bytes_out+=skb->len;
1138                ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1139                ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1140                ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1141                return;
1142        }
1143#endif
1144
1145        if (vif->flags&VIFF_TUNNEL) {
1146                if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1147                        return;
1148                encap = sizeof(struct iphdr);
1149        } else {
1150                if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1151                        return;
1152        }
1153
1154        dev = rt->u.dst.dev;
1155
1156        if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1157                /* Do not fragment multicasts. Alas, IPv4 does not
1158                   allow to send ICMP, so that packets will disappear
1159                   to blackhole.
1160                 */
1161
1162                IP_INC_STATS_BH(IpFragFails);
1163                ip_rt_put(rt);
1164                return;
1165        }
1166
1167        encap += dev->hard_header_len;
1168
1169        if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1170                skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1171        else if (atomic_read(&skb->users) != 1)
1172                skb2 = skb_clone(skb, GFP_ATOMIC);
1173        else {
1174                atomic_inc(&skb->users);
1175                skb2 = skb;
1176        }
1177
1178        if (skb2 == NULL) {
1179                ip_rt_put(rt);
1180                return;
1181        }
1182
1183        vif->pkt_out++;
1184        vif->bytes_out+=skb->len;
1185
1186        dst_release(skb2->dst);
1187        skb2->dst = &rt->u.dst;
1188        iph = skb2->nh.iph;
1189        ip_decrease_ttl(iph);
1190
1191        /* FIXME: forward and output firewalls used to be called here.
1192         * What do we do with netfilter? -- RR */
1193        if (vif->flags & VIFF_TUNNEL) {
1194                ip_encap(skb2, vif->local, vif->remote);
1195                /* FIXME: extra output firewall step used to be here. --RR */
1196                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1197                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1198        }
1199
1200        IPCB(skb2)->flags |= IPSKB_FORWARDED;
1201
1202        /*
1203         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1204         * not only before forwarding, but after forwarding on all output
1205         * interfaces. It is clear, if mrouter runs a multicasting
1206         * program, it should receive packets not depending to what interface
1207         * program is joined.
1208         * If we will not make it, the program will have to join on all
1209         * interfaces. On the other hand, multihoming host (or router, but
1210         * not mrouter) cannot join to more than one interface - it will
1211         * result in receiving multiple packets.
1212         */
1213        NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev, 
1214                ipmr_forward_finish);
1215}
1216
1217int ipmr_find_vif(struct net_device *dev)
1218{
1219        int ct;
1220        for (ct=maxvif-1; ct>=0; ct--) {
1221                if (vif_table[ct].dev == dev)
1222                        break;
1223        }
1224        return ct;
1225}
1226
1227/* "local" means that we should preserve one skb (for local delivery) */
1228
1229int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1230{
1231        int psend = -1;
1232        int vif, ct;
1233
1234        vif = cache->mfc_parent;
1235        cache->mfc_un.res.pkt++;
1236        cache->mfc_un.res.bytes += skb->len;
1237
1238        /*
1239         * Wrong interface: drop packet and (maybe) send PIM assert.
1240         */
1241        if (vif_table[vif].dev != skb->dev) {
1242                int true_vifi;
1243
1244                if (((struct rtable*)skb->dst)->key.iif == 0) {
1245                        /* It is our own packet, looped back.
1246                           Very complicated situation...
1247
1248                           The best workaround until routing daemons will be
1249                           fixed is not to redistribute packet, if it was
1250                           send through wrong interface. It means, that
1251                           multicast applications WILL NOT work for
1252                           (S,G), which have default multicast route pointing
1253                           to wrong oif. In any case, it is not a good
1254                           idea to use multicasting applications on router.
1255                         */
1256                        goto dont_forward;
1257                }
1258
1259                cache->mfc_un.res.wrong_if++;
1260                true_vifi = ipmr_find_vif(skb->dev);
1261
1262                if (true_vifi >= 0 && mroute_do_assert &&
1263                    /* pimsm uses asserts, when switching from RPT to SPT,
1264                       so that we cannot check that packet arrived on an oif.
1265                       It is bad, but otherwise we would need to move pretty
1266                       large chunk of pimd to kernel. Ough... --ANK
1267                     */
1268                    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1269                    jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1270                        cache->mfc_un.res.last_assert = jiffies;
1271                        ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1272                }
1273                goto dont_forward;
1274        }
1275
1276        vif_table[vif].pkt_in++;
1277        vif_table[vif].bytes_in+=skb->len;
1278
1279        /*
1280         *      Forward the frame
1281         */
1282        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1283                if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1284                        if (psend != -1)
1285                                ipmr_queue_xmit(skb, cache, psend, 0);
1286                        psend=ct;
1287                }
1288        }
1289        if (psend != -1)
1290                ipmr_queue_xmit(skb, cache, psend, !local);
1291
1292dont_forward:
1293        if (!local)
1294                kfree_skb(skb);
1295        return 0;
1296}
1297
1298
1299/*
1300 *      Multicast packets for forwarding arrive here
1301 */
1302
1303int ip_mr_input(struct sk_buff *skb)
1304{
1305        struct mfc_cache *cache;
1306        int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1307
1308        /* Packet is looped back after forward, it should not be
1309           forwarded second time, but still can be delivered locally.
1310         */
1311        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1312                goto dont_forward;
1313
1314        if (!local) {
1315                    if (IPCB(skb)->opt.router_alert) {
1316                            if (ip_call_ra_chain(skb))
1317                                    return 0;
1318                    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1319                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1320                               Cisco IOS <= 11.2(8)) do not put router alert
1321                               option to IGMP packets destined to routable
1322                               groups. It is very bad, because it means
1323                               that we can forward NO IGMP messages.
1324                             */
1325                            read_lock(&mrt_lock);
1326                            if (mroute_socket) {
1327                                    raw_rcv(mroute_socket, skb);
1328                                    read_unlock(&mrt_lock);
1329                                    return 0;
1330                            }
1331                            read_unlock(&mrt_lock);
1332                    }
1333        }
1334
1335        read_lock(&mrt_lock);
1336        cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1337
1338        /*
1339         *      No usable cache entry
1340         */
1341        if (cache==NULL) {
1342                int vif;
1343
1344                if (local) {
1345                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1346                        ip_local_deliver(skb);
1347                        if (skb2 == NULL) {
1348                                read_unlock(&mrt_lock);
1349                                return -ENOBUFS;
1350                        }
1351                        skb = skb2;
1352                }
1353
1354                vif = ipmr_find_vif(skb->dev);
1355                if (vif >= 0) {
1356                        int err = ipmr_cache_unresolved(vif, skb);
1357                        read_unlock(&mrt_lock);
1358
1359                        return err;
1360                }
1361                read_unlock(&mrt_lock);
1362                kfree_skb(skb);
1363                return -ENODEV;
1364        }
1365
1366        ip_mr_forward(skb, cache, local);
1367
1368        read_unlock(&mrt_lock);
1369
1370        if (local)
1371                return ip_local_deliver(skb);
1372
1373        return 0;
1374
1375dont_forward:
1376        if (local)
1377                return ip_local_deliver(skb);
1378        kfree_skb(skb);
1379        return 0;
1380}
1381
1382#ifdef CONFIG_IP_PIMSM_V1
1383/*
1384 * Handle IGMP messages of PIMv1
1385 */
1386
1387int pim_rcv_v1(struct sk_buff * skb)
1388{
1389        struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1390        struct iphdr   *encap;
1391        struct net_device  *reg_dev = NULL;
1392
1393        if (skb_is_nonlinear(skb)) {
1394                if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1395                        kfree_skb(skb);
1396                        return -ENOMEM;
1397                }
1398                pim = (struct igmphdr*)skb->h.raw;
1399        }
1400
1401        if (!mroute_do_pim ||
1402            skb->len < sizeof(*pim) + sizeof(*encap) ||
1403            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1404                kfree_skb(skb);
1405                return -EINVAL;
1406        }
1407
1408        encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1409        /*
1410           Check that:
1411           a. packet is really destinted to a multicast group
1412           b. packet is not a NULL-REGISTER
1413           c. packet is not truncated
1414         */
1415        if (!MULTICAST(encap->daddr) ||
1416            ntohs(encap->tot_len) == 0 ||
1417            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1418                kfree_skb(skb);
1419                return -EINVAL;
1420        }
1421
1422        read_lock(&mrt_lock);
1423        if (reg_vif_num >= 0)
1424                reg_dev = vif_table[reg_vif_num].dev;
1425        if (reg_dev)
1426                dev_hold(reg_dev);
1427        read_unlock(&mrt_lock);
1428
1429        if (reg_dev == NULL) {
1430                kfree_skb(skb);
1431                return -EINVAL;
1432        }
1433
1434        skb->mac.raw = skb->nh.raw;
1435        skb_pull(skb, (u8*)encap - skb->data);
1436        skb->nh.iph = (struct iphdr *)skb->data;
1437        skb->dev = reg_dev;
1438        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1439        skb->protocol = htons(ETH_P_IP);
1440        skb->ip_summed = 0;
1441        skb->pkt_type = PACKET_HOST;
1442        dst_release(skb->dst);
1443        skb->dst = NULL;
1444        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1445        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1446        nf_reset(skb);
1447        netif_rx(skb);
1448        dev_put(reg_dev);
1449        return 0;
1450}
1451#endif
1452
1453#ifdef CONFIG_IP_PIMSM_V2
1454int pim_rcv(struct sk_buff * skb)
1455{
1456        struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1457        struct iphdr   *encap;
1458        struct net_device  *reg_dev = NULL;
1459
1460        if (skb_is_nonlinear(skb)) {
1461                if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1462                        kfree_skb(skb);
1463                        return -ENOMEM;
1464                }
1465                pim = (struct pimreghdr*)skb->h.raw;
1466        }
1467
1468        if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1469            pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1470            (pim->flags&PIM_NULL_REGISTER) ||
1471            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1472             ip_compute_csum((void *)pim, skb->len))) {
1473                kfree_skb(skb);
1474                return -EINVAL;
1475        }
1476
1477        /* check if the inner packet is destined to mcast group */
1478        encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1479        if (!MULTICAST(encap->daddr) ||
1480            ntohs(encap->tot_len) == 0 ||
1481            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1482                kfree_skb(skb);
1483                return -EINVAL;
1484        }
1485
1486        read_lock(&mrt_lock);
1487        if (reg_vif_num >= 0)
1488                reg_dev = vif_table[reg_vif_num].dev;
1489        if (reg_dev)
1490                dev_hold(reg_dev);
1491        read_unlock(&mrt_lock);
1492
1493        if (reg_dev == NULL) {
1494                kfree_skb(skb);
1495                return -EINVAL;
1496        }
1497
1498        skb->mac.raw = skb->nh.raw;
1499        skb_pull(skb, (u8*)encap - skb->data);
1500        skb->nh.iph = (struct iphdr *)skb->data;
1501        skb->dev = reg_dev;
1502        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1503        skb->protocol = htons(ETH_P_IP);
1504        skb->ip_summed = 0;
1505        skb->pkt_type = PACKET_HOST;
1506        dst_release(skb->dst);
1507        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1508        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1509        skb->dst = NULL;
1510        nf_reset(skb);
1511        netif_rx(skb);
1512        dev_put(reg_dev);
1513        return 0;
1514}
1515#endif
1516
1517static int
1518ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1519{
1520        int ct;
1521        struct rtnexthop *nhp;
1522        struct net_device *dev = vif_table[c->mfc_parent].dev;
1523        u8 *b = skb->tail;
1524        struct rtattr *mp_head;
1525
1526        if (dev)
1527                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1528
1529        mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1530
1531        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1532                if (c->mfc_un.res.ttls[ct] < 255) {
1533                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1534                                goto rtattr_failure;
1535                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1536                        nhp->rtnh_flags = 0;
1537                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1538                        nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1539                        nhp->rtnh_len = sizeof(*nhp);
1540                }
1541        }
1542        mp_head->rta_type = RTA_MULTIPATH;
1543        mp_head->rta_len = skb->tail - (u8*)mp_head;
1544        rtm->rtm_type = RTN_MULTICAST;
1545        return 1;
1546
1547rtattr_failure:
1548        skb_trim(skb, b - skb->data);
1549        return -EMSGSIZE;
1550}
1551
1552int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1553{
1554        int err;
1555        struct mfc_cache *cache;
1556        struct rtable *rt = (struct rtable*)skb->dst;
1557
1558        read_lock(&mrt_lock);
1559        cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1560
1561        if (cache==NULL) {
1562                struct net_device *dev;
1563                int vif;
1564
1565                if (nowait) {
1566                        read_unlock(&mrt_lock);
1567                        return -EAGAIN;
1568                }
1569
1570                dev = skb->dev;
1571                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1572                        read_unlock(&mrt_lock);
1573                        return -ENODEV;
1574                }
1575                skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1576                skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1577                skb->nh.iph->saddr = rt->rt_src;
1578                skb->nh.iph->daddr = rt->rt_dst;
1579                skb->nh.iph->version = 0;
1580                err = ipmr_cache_unresolved(vif, skb);
1581                read_unlock(&mrt_lock);
1582                return err;
1583        }
1584
1585        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1586                cache->mfc_flags |= MFC_NOTIFY;
1587        err = ipmr_fill_mroute(skb, cache, rtm);
1588        read_unlock(&mrt_lock);
1589        return err;
1590}
1591
1592#ifdef CONFIG_PROC_FS   
1593/*
1594 *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1595 */
1596 
1597static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1598{
1599        struct vif_device *vif;
1600        int len=0;
1601        off_t pos=0;
1602        off_t begin=0;
1603        int size;
1604        int ct;
1605
1606        len += sprintf(buffer,
1607                 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1608        pos=len;
1609  
1610        read_lock(&mrt_lock);
1611        for (ct=0;ct<maxvif;ct++) 
1612        {
1613                char *name = "none";
1614                vif=&vif_table[ct];
1615                if(!VIF_EXISTS(ct))
1616                        continue;
1617                if (vif->dev)
1618                        name = vif->dev->name;
1619                size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1620                        ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1621                        vif->flags, vif->local, vif->remote);
1622                len+=size;
1623                pos+=size;
1624                if(pos<offset)
1625                {
1626                        len=0;
1627                        begin=pos;
1628                }
1629                if(pos>offset+length)
1630                        break;
1631        }
1632        read_unlock(&mrt_lock);
1633        
1634        *start=buffer+(offset-begin);
1635        len-=(offset-begin);
1636        if(len>length)
1637                len=length;
1638        if (len<0)
1639                len = 0;
1640        return len;
1641}
1642
1643static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1644{
1645        struct mfc_cache *mfc;
1646        int len=0;
1647        off_t pos=0;
1648        off_t begin=0;
1649        int size;
1650        int ct;
1651
1652        len += sprintf(buffer,
1653                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1654        pos=len;
1655
1656        read_lock(&mrt_lock);
1657        for (ct=0;ct<MFC_LINES;ct++) 
1658        {
1659                for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1660                {
1661                        int n;
1662
1663                        /*
1664                         *      Interface forwarding map
1665                         */
1666                        size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1667                                (unsigned long)mfc->mfc_mcastgrp,
1668                                (unsigned long)mfc->mfc_origin,
1669                                mfc->mfc_parent,
1670                                mfc->mfc_un.res.pkt,
1671                                mfc->mfc_un.res.bytes,
1672                                mfc->mfc_un.res.wrong_if);
1673                        for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1674                        {
1675                                if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1676                                        size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1677                        }
1678                        size += sprintf(buffer+len+size, "\n");
1679                        len+=size;
1680                        pos+=size;
1681                        if(pos<offset)
1682                        {
1683                                len=0;
1684                                begin=pos;
1685                        }
1686                        if(pos>offset+length)
1687                                goto done;
1688                }
1689        }
1690
1691        spin_lock_bh(&mfc_unres_lock);
1692        for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1693                size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1694                               (unsigned long)mfc->mfc_mcastgrp,
1695                               (unsigned long)mfc->mfc_origin,
1696                               -1,
1697                                (long)mfc->mfc_un.unres.unresolved.qlen,
1698                                0L, 0L);
1699                len+=size;
1700                pos+=size;
1701                if(pos<offset)
1702                {
1703                        len=0;
1704                        begin=pos;
1705                }
1706                if(pos>offset+length)
1707                        break;
1708        }
1709        spin_unlock_bh(&mfc_unres_lock);
1710
1711done:
1712        read_unlock(&mrt_lock);
1713        *start=buffer+(offset-begin);
1714        len-=(offset-begin);
1715        if(len>length)
1716                len=length;
1717        if (len < 0) {
1718                len = 0;
1719        }
1720        return len;
1721}
1722
1723#endif  
1724
1725#ifdef CONFIG_IP_PIMSM_V2
1726struct inet_protocol pim_protocol = 
1727{
1728        pim_rcv,                /* PIM handler          */
1729        NULL,                   /* PIM error control    */
1730        NULL,                   /* next                 */
1731        IPPROTO_PIM,            /* protocol ID          */
1732        0,                      /* copy                 */
1733        NULL,                   /* data                 */
1734        "PIM"                   /* name                 */
1735};
1736#endif
1737
1738
1739/*
1740 *      Setup for IP multicast routing
1741 */
1742 
1743void __init ip_mr_init(void)
1744{
1745        printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1746        mrt_cachep = kmem_cache_create("ip_mrt_cache",
1747                                       sizeof(struct mfc_cache),
1748                                       0, SLAB_HWCACHE_ALIGN,
1749                                       NULL, NULL);
1750        init_timer(&ipmr_expire_timer);
1751        ipmr_expire_timer.function=ipmr_expire_process;
1752        register_netdevice_notifier(&ip_mr_notifier);
1753#ifdef CONFIG_PROC_FS   
1754        proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1755        proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1756#endif  
1757}
1758
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.