linux-bk/net/ipv4/ipmr.c
<<
>>
Prefs
   1/*
   2 *      IP multicast routing support for mrouted 3.6/3.8
   3 *
   4 *              (c) 1995 Alan Cox, <alan@redhat.com>
   5 *        Linux Consultancy and Custom Driver Development
   6 *
   7 *      This program is free software; you can redistribute it and/or
   8 *      modify it under the terms of the GNU General Public License
   9 *      as published by the Free Software Foundation; either version
  10 *      2 of the License, or (at your option) any later version.
  11 *
  12 *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
  13 *
  14 *      Fixes:
  15 *      Michael Chastain        :       Incorrect size of copying.
  16 *      Alan Cox                :       Added the cache manager code
  17 *      Alan Cox                :       Fixed the clone/copy bug and device race.
  18 *      Mike McLagan            :       Routing by source
  19 *      Malcolm Beattie         :       Buffer handling fixes.
  20 *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  21 *      SVR Anand               :       Fixed several multicast bugs and problems.
  22 *      Alexey Kuznetsov        :       Status, optimisations and more.
  23 *      Brad Parker             :       Better behaviour on mrouted upcall
  24 *                                      overflow.
  25 *      Carlos Picoto           :       PIMv1 Support
  26 *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  27 *                                      Relax this requrement to work with older peers.
  28 *
  29 */
  30
  31#include <linux/config.h>
  32#include <asm/system.h>
  33#include <asm/uaccess.h>
  34#include <linux/types.h>
  35#include <linux/sched.h>
  36#include <linux/errno.h>
  37#include <linux/timer.h>
  38#include <linux/mm.h>
  39#include <linux/kernel.h>
  40#include <linux/fcntl.h>
  41#include <linux/stat.h>
  42#include <linux/socket.h>
  43#include <linux/in.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/inetdevice.h>
  47#include <linux/igmp.h>
  48#include <linux/proc_fs.h>
  49#include <linux/seq_file.h>
  50#include <linux/mroute.h>
  51#include <linux/init.h>
  52#include <net/ip.h>
  53#include <net/protocol.h>
  54#include <linux/skbuff.h>
  55#include <net/sock.h>
  56#include <net/icmp.h>
  57#include <net/udp.h>
  58#include <net/raw.h>
  59#include <linux/notifier.h>
  60#include <linux/if_arp.h>
  61#include <linux/netfilter_ipv4.h>
  62#include <net/ipip.h>
  63#include <net/checksum.h>
  64
  65#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  66#define CONFIG_IP_PIMSM 1
  67#endif
  68
  69static struct sock *mroute_socket;
  70
  71
  72/* Big lock, protecting vif table, mrt cache and mroute socket state.
  73   Note that the changes are semaphored via rtnl_lock.
  74 */
  75
  76static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
  77
  78/*
  79 *      Multicast router control variables
  80 */
  81
  82static struct vif_device vif_table[MAXVIFS];            /* Devices              */
  83static int maxvif;
  84
  85#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
  86
  87static int mroute_do_assert;                            /* Set in PIM assert    */
  88static int mroute_do_pim;
  89
  90static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
  91
  92static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  93static atomic_t cache_resolve_queue_len;                /* Size of unresolved   */
  94
  95/* Special spinlock for queue of unresolved entries */
  96static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
  97
  98/* We return to original Alan's scheme. Hash table of resolved
  99   entries is changed only in process context and protected
 100   with weak lock mrt_lock. Queue of unresolved entries is protected
 101   with strong spinlock mfc_unres_lock.
 102
 103   In this case data path is free of exclusive locks at all.
 104 */
 105
 106static kmem_cache_t *mrt_cachep;
 107
 108static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
 109static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
 110static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 111
 112static struct inet_protocol pim_protocol;
 113
 114static struct timer_list ipmr_expire_timer;
 115
 116/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 117
 118static
 119struct net_device *ipmr_new_tunnel(struct vifctl *v)
 120{
 121        struct net_device  *dev;
 122
 123        dev = __dev_get_by_name("tunl0");
 124
 125        if (dev) {
 126                int err;
 127                struct ifreq ifr;
 128                mm_segment_t    oldfs;
 129                struct ip_tunnel_parm p;
 130                struct in_device  *in_dev;
 131
 132                memset(&p, 0, sizeof(p));
 133                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 134                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 135                p.iph.version = 4;
 136                p.iph.ihl = 5;
 137                p.iph.protocol = IPPROTO_IPIP;
 138                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 139                ifr.ifr_ifru.ifru_data = (void*)&p;
 140
 141                oldfs = get_fs(); set_fs(KERNEL_DS);
 142                err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 143                set_fs(oldfs);
 144
 145                dev = NULL;
 146
 147                if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
 148                        dev->flags |= IFF_MULTICAST;
 149
 150                        in_dev = __in_dev_get(dev);
 151                        if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
 152                                goto failure;
 153                        in_dev->cnf.rp_filter = 0;
 154
 155                        if (dev_open(dev))
 156                                goto failure;
 157                }
 158        }
 159        return dev;
 160
 161failure:
 162        /* allow the register to be completed before unregistering. */
 163        rtnl_unlock();
 164        rtnl_lock();
 165
 166        unregister_netdevice(dev);
 167        return NULL;
 168}
 169
 170#ifdef CONFIG_IP_PIMSM
 171
 172static int reg_vif_num = -1;
 173
 174static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 175{
 176        read_lock(&mrt_lock);
 177        ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
 178        ((struct net_device_stats*)dev->priv)->tx_packets++;
 179        ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 180        read_unlock(&mrt_lock);
 181        kfree_skb(skb);
 182        return 0;
 183}
 184
 185static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
 186{
 187        return (struct net_device_stats*)dev->priv;
 188}
 189
 190static void reg_vif_setup(struct net_device *dev)
 191{
 192        dev->type               = ARPHRD_PIMREG;
 193        dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
 194        dev->flags              = IFF_NOARP;
 195        dev->hard_start_xmit    = reg_vif_xmit;
 196        dev->get_stats          = reg_vif_get_stats;
 197        dev->destructor         = free_netdev;
 198}
 199
 200static struct net_device *ipmr_reg_vif(void)
 201{
 202        struct net_device *dev;
 203        struct in_device *in_dev;
 204
 205        dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
 206                           reg_vif_setup);
 207
 208        if (register_netdevice(dev)) {
 209                kfree(dev);
 210                return NULL;
 211        }
 212        dev->iflink = 0;
 213
 214        if ((in_dev = inetdev_init(dev)) == NULL)
 215                goto failure;
 216
 217        in_dev->cnf.rp_filter = 0;
 218
 219        if (dev_open(dev))
 220                goto failure;
 221
 222        return dev;
 223
 224failure:
 225        /* allow the register to be completed before unregistering. */
 226        rtnl_unlock();
 227        rtnl_lock();
 228
 229        unregister_netdevice(dev);
 230        return NULL;
 231}
 232#endif
 233
 234/*
 235 *      Delete a VIF entry
 236 */
 237 
 238static int vif_delete(int vifi)
 239{
 240        struct vif_device *v;
 241        struct net_device *dev;
 242        struct in_device *in_dev;
 243
 244        if (vifi < 0 || vifi >= maxvif)
 245                return -EADDRNOTAVAIL;
 246
 247        v = &vif_table[vifi];
 248
 249        write_lock_bh(&mrt_lock);
 250        dev = v->dev;
 251        v->dev = NULL;
 252
 253        if (!dev) {
 254                write_unlock_bh(&mrt_lock);
 255                return -EADDRNOTAVAIL;
 256        }
 257
 258#ifdef CONFIG_IP_PIMSM
 259        if (vifi == reg_vif_num)
 260                reg_vif_num = -1;
 261#endif
 262
 263        if (vifi+1 == maxvif) {
 264                int tmp;
 265                for (tmp=vifi-1; tmp>=0; tmp--) {
 266                        if (VIF_EXISTS(tmp))
 267                                break;
 268                }
 269                maxvif = tmp+1;
 270        }
 271
 272        write_unlock_bh(&mrt_lock);
 273
 274        dev_set_allmulti(dev, -1);
 275
 276        if ((in_dev = __in_dev_get(dev)) != NULL) {
 277                in_dev->cnf.mc_forwarding--;
 278                ip_rt_multicast_event(in_dev);
 279        }
 280
 281        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 282                unregister_netdevice(dev);
 283
 284        dev_put(dev);
 285        return 0;
 286}
 287
 288/* Destroy an unresolved cache entry, killing queued skbs
 289   and reporting error to netlink readers.
 290 */
 291
 292static void ipmr_destroy_unres(struct mfc_cache *c)
 293{
 294        struct sk_buff *skb;
 295
 296        atomic_dec(&cache_resolve_queue_len);
 297
 298        while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
 299                if (skb->nh.iph->version == 0) {
 300                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 301                        nlh->nlmsg_type = NLMSG_ERROR;
 302                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 303                        skb_trim(skb, nlh->nlmsg_len);
 304                        ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
 305                        netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
 306                } else
 307                        kfree_skb(skb);
 308        }
 309
 310        kmem_cache_free(mrt_cachep, c);
 311}
 312
 313
 314/* Single timer process for all the unresolved queue. */
 315
 316static void ipmr_expire_process(unsigned long dummy)
 317{
 318        unsigned long now;
 319        unsigned long expires;
 320        struct mfc_cache *c, **cp;
 321
 322        if (!spin_trylock(&mfc_unres_lock)) {
 323                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 324                return;
 325        }
 326
 327        if (atomic_read(&cache_resolve_queue_len) == 0)
 328                goto out;
 329
 330        now = jiffies;
 331        expires = 10*HZ;
 332        cp = &mfc_unres_queue;
 333
 334        while ((c=*cp) != NULL) {
 335                if (time_after(c->mfc_un.unres.expires, now)) {
 336                        unsigned long interval = c->mfc_un.unres.expires - now;
 337                        if (interval < expires)
 338                                expires = interval;
 339                        cp = &c->next;
 340                        continue;
 341                }
 342
 343                *cp = c->next;
 344
 345                ipmr_destroy_unres(c);
 346        }
 347
 348        if (atomic_read(&cache_resolve_queue_len))
 349                mod_timer(&ipmr_expire_timer, jiffies + expires);
 350
 351out:
 352        spin_unlock(&mfc_unres_lock);
 353}
 354
 355/* Fill oifs list. It is called under write locked mrt_lock. */
 356
 357static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
 358{
 359        int vifi;
 360
 361        cache->mfc_un.res.minvif = MAXVIFS;
 362        cache->mfc_un.res.maxvif = 0;
 363        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 364
 365        for (vifi=0; vifi<maxvif; vifi++) {
 366                if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
 367                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 368                        if (cache->mfc_un.res.minvif > vifi)
 369                                cache->mfc_un.res.minvif = vifi;
 370                        if (cache->mfc_un.res.maxvif <= vifi)
 371                                cache->mfc_un.res.maxvif = vifi + 1;
 372                }
 373        }
 374}
 375
 376static int vif_add(struct vifctl *vifc, int mrtsock)
 377{
 378        int vifi = vifc->vifc_vifi;
 379        struct vif_device *v = &vif_table[vifi];
 380        struct net_device *dev;
 381        struct in_device *in_dev;
 382
 383        /* Is vif busy ? */
 384        if (VIF_EXISTS(vifi))
 385                return -EADDRINUSE;
 386
 387        switch (vifc->vifc_flags) {
 388#ifdef CONFIG_IP_PIMSM
 389        case VIFF_REGISTER:
 390                /*
 391                 * Special Purpose VIF in PIM
 392                 * All the packets will be sent to the daemon
 393                 */
 394                if (reg_vif_num >= 0)
 395                        return -EADDRINUSE;
 396                dev = ipmr_reg_vif();
 397                if (!dev)
 398                        return -ENOBUFS;
 399                break;
 400#endif
 401        case VIFF_TUNNEL:       
 402                dev = ipmr_new_tunnel(vifc);
 403                if (!dev)
 404                        return -ENOBUFS;
 405                break;
 406        case 0:
 407                dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
 408                if (!dev)
 409                        return -EADDRNOTAVAIL;
 410                __dev_put(dev);
 411                break;
 412        default:
 413                return -EINVAL;
 414        }
 415
 416        if ((in_dev = __in_dev_get(dev)) == NULL)
 417                return -EADDRNOTAVAIL;
 418        in_dev->cnf.mc_forwarding++;
 419        dev_set_allmulti(dev, +1);
 420        ip_rt_multicast_event(in_dev);
 421
 422        /*
 423         *      Fill in the VIF structures
 424         */
 425        v->rate_limit=vifc->vifc_rate_limit;
 426        v->local=vifc->vifc_lcl_addr.s_addr;
 427        v->remote=vifc->vifc_rmt_addr.s_addr;
 428        v->flags=vifc->vifc_flags;
 429        if (!mrtsock)
 430                v->flags |= VIFF_STATIC;
 431        v->threshold=vifc->vifc_threshold;
 432        v->bytes_in = 0;
 433        v->bytes_out = 0;
 434        v->pkt_in = 0;
 435        v->pkt_out = 0;
 436        v->link = dev->ifindex;
 437        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 438                v->link = dev->iflink;
 439
 440        /* And finish update writing critical data */
 441        write_lock_bh(&mrt_lock);
 442        dev_hold(dev);
 443        v->dev=dev;
 444#ifdef CONFIG_IP_PIMSM
 445        if (v->flags&VIFF_REGISTER)
 446                reg_vif_num = vifi;
 447#endif
 448        if (vifi+1 > maxvif)
 449                maxvif = vifi+1;
 450        write_unlock_bh(&mrt_lock);
 451        return 0;
 452}
 453
 454static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
 455{
 456        int line=MFC_HASH(mcastgrp,origin);
 457        struct mfc_cache *c;
 458
 459        for (c=mfc_cache_array[line]; c; c = c->next) {
 460                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 461                        break;
 462        }
 463        return c;
 464}
 465
 466/*
 467 *      Allocate a multicast cache entry
 468 */
 469static struct mfc_cache *ipmr_cache_alloc(void)
 470{
 471        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
 472        if(c==NULL)
 473                return NULL;
 474        memset(c, 0, sizeof(*c));
 475        c->mfc_un.res.minvif = MAXVIFS;
 476        return c;
 477}
 478
 479static struct mfc_cache *ipmr_cache_alloc_unres(void)
 480{
 481        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
 482        if(c==NULL)
 483                return NULL;
 484        memset(c, 0, sizeof(*c));
 485        skb_queue_head_init(&c->mfc_un.unres.unresolved);
 486        c->mfc_un.unres.expires = jiffies + 10*HZ;
 487        return c;
 488}
 489
 490/*
 491 *      A cache entry has gone into a resolved state from queued
 492 */
 493 
 494static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 495{
 496        struct sk_buff *skb;
 497
 498        /*
 499         *      Play the pending entries through our router
 500         */
 501
 502        while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 503                if (skb->nh.iph->version == 0) {
 504                        int err;
 505                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 506
 507                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 508                                nlh->nlmsg_len = skb->tail - (u8*)nlh;
 509                        } else {
 510                                nlh->nlmsg_type = NLMSG_ERROR;
 511                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 512                                skb_trim(skb, nlh->nlmsg_len);
 513                                ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 514                        }
 515                        err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
 516                } else
 517                        ip_mr_forward(skb, c, 0);
 518        }
 519}
 520
 521/*
 522 *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 523 *      expects the following bizarre scheme.
 524 *
 525 *      Called under mrt_lock.
 526 */
 527 
 528static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 529{
 530        struct sk_buff *skb;
 531        int ihl = pkt->nh.iph->ihl<<2;
 532        struct igmphdr *igmp;
 533        struct igmpmsg *msg;
 534        int ret;
 535
 536#ifdef CONFIG_IP_PIMSM
 537        if (assert == IGMPMSG_WHOLEPKT)
 538                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 539        else
 540#endif
 541                skb = alloc_skb(128, GFP_ATOMIC);
 542
 543        if(!skb)
 544                return -ENOBUFS;
 545
 546#ifdef CONFIG_IP_PIMSM
 547        if (assert == IGMPMSG_WHOLEPKT) {
 548                /* Ugly, but we have no choice with this interface.
 549                   Duplicate old header, fix ihl, length etc.
 550                   And all this only to mangle msg->im_msgtype and
 551                   to set msg->im_mbz to "mbz" :-)
 552                 */
 553                msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
 554                skb->nh.raw = skb->h.raw = (u8*)msg;
 555                memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
 556                msg->im_msgtype = IGMPMSG_WHOLEPKT;
 557                msg->im_mbz = 0;
 558                msg->im_vif = reg_vif_num;
 559                skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
 560                skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
 561        } else 
 562#endif
 563        {       
 564                
 565        /*
 566         *      Copy the IP header
 567         */
 568
 569        skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
 570        memcpy(skb->data,pkt->data,ihl);
 571        skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
 572        msg = (struct igmpmsg*)skb->nh.iph;
 573        msg->im_vif = vifi;
 574        skb->dst = dst_clone(pkt->dst);
 575
 576        /*
 577         *      Add our header
 578         */
 579
 580        igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
 581        igmp->type      =
 582        msg->im_msgtype = assert;
 583        igmp->code      =       0;
 584        skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
 585        skb->h.raw = skb->nh.raw;
 586        }
 587
 588        if (mroute_socket == NULL) {
 589                kfree_skb(skb);
 590                return -EINVAL;
 591        }
 592
 593        /*
 594         *      Deliver to mrouted
 595         */
 596        if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
 597                if (net_ratelimit())
 598                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 599                kfree_skb(skb);
 600        }
 601
 602        return ret;
 603}
 604
 605/*
 606 *      Queue a packet for resolution. It gets locked cache entry!
 607 */
 608 
 609static int
 610ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 611{
 612        int err;
 613        struct mfc_cache *c;
 614
 615        spin_lock_bh(&mfc_unres_lock);
 616        for (c=mfc_unres_queue; c; c=c->next) {
 617                if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
 618                    c->mfc_origin == skb->nh.iph->saddr)
 619                        break;
 620        }
 621
 622        if (c == NULL) {
 623                /*
 624                 *      Create a new entry if allowable
 625                 */
 626
 627                if (atomic_read(&cache_resolve_queue_len)>=10 ||
 628                    (c=ipmr_cache_alloc_unres())==NULL) {
 629                        spin_unlock_bh(&mfc_unres_lock);
 630
 631                        kfree_skb(skb);
 632                        return -ENOBUFS;
 633                }
 634
 635                /*
 636                 *      Fill in the new cache entry
 637                 */
 638                c->mfc_parent=-1;
 639                c->mfc_origin=skb->nh.iph->saddr;
 640                c->mfc_mcastgrp=skb->nh.iph->daddr;
 641
 642                /*
 643                 *      Reflect first query at mrouted.
 644                 */
 645                if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
 646                        /* If the report failed throw the cache entry 
 647                           out - Brad Parker
 648                         */
 649                        spin_unlock_bh(&mfc_unres_lock);
 650
 651                        kmem_cache_free(mrt_cachep, c);
 652                        kfree_skb(skb);
 653                        return err;
 654                }
 655
 656                atomic_inc(&cache_resolve_queue_len);
 657                c->next = mfc_unres_queue;
 658                mfc_unres_queue = c;
 659
 660                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 661        }
 662
 663        /*
 664         *      See if we can append the packet
 665         */
 666        if (c->mfc_un.unres.unresolved.qlen>3) {
 667                kfree_skb(skb);
 668                err = -ENOBUFS;
 669        } else {
 670                skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
 671                err = 0;
 672        }
 673
 674        spin_unlock_bh(&mfc_unres_lock);
 675        return err;
 676}
 677
 678/*
 679 *      MFC cache manipulation by user space mroute daemon
 680 */
 681
 682static int ipmr_mfc_delete(struct mfcctl *mfc)
 683{
 684        int line;
 685        struct mfc_cache *c, **cp;
 686
 687        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 688
 689        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 690                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 691                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 692                        write_lock_bh(&mrt_lock);
 693                        *cp = c->next;
 694                        write_unlock_bh(&mrt_lock);
 695
 696                        kmem_cache_free(mrt_cachep, c);
 697                        return 0;
 698                }
 699        }
 700        return -ENOENT;
 701}
 702
 703static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 704{
 705        int line;
 706        struct mfc_cache *uc, *c, **cp;
 707
 708        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 709
 710        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 711                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 712                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 713                        break;
 714        }
 715
 716        if (c != NULL) {
 717                write_lock_bh(&mrt_lock);
 718                c->mfc_parent = mfc->mfcc_parent;
 719                ipmr_update_threshoulds(c, mfc->mfcc_ttls);
 720                if (!mrtsock)
 721                        c->mfc_flags |= MFC_STATIC;
 722                write_unlock_bh(&mrt_lock);
 723                return 0;
 724        }
 725
 726        if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
 727                return -EINVAL;
 728
 729        c=ipmr_cache_alloc();
 730        if (c==NULL)
 731                return -ENOMEM;
 732
 733        c->mfc_origin=mfc->mfcc_origin.s_addr;
 734        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
 735        c->mfc_parent=mfc->mfcc_parent;
 736        ipmr_update_threshoulds(c, mfc->mfcc_ttls);
 737        if (!mrtsock)
 738                c->mfc_flags |= MFC_STATIC;
 739
 740        write_lock_bh(&mrt_lock);
 741        c->next = mfc_cache_array[line];
 742        mfc_cache_array[line] = c;
 743        write_unlock_bh(&mrt_lock);
 744
 745        /*
 746         *      Check to see if we resolved a queued list. If so we
 747         *      need to send on the frames and tidy up.
 748         */
 749        spin_lock_bh(&mfc_unres_lock);
 750        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 751             cp = &uc->next) {
 752                if (uc->mfc_origin == c->mfc_origin &&
 753                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 754                        *cp = uc->next;
 755                        if (atomic_dec_and_test(&cache_resolve_queue_len))
 756                                del_timer(&ipmr_expire_timer);
 757                        break;
 758                }
 759        }
 760        spin_unlock_bh(&mfc_unres_lock);
 761
 762        if (uc) {
 763                ipmr_cache_resolve(uc, c);
 764                kmem_cache_free(mrt_cachep, uc);
 765        }
 766        return 0;
 767}
 768
 769/*
 770 *      Close the multicast socket, and clear the vif tables etc
 771 */
 772 
 773static void mroute_clean_tables(struct sock *sk)
 774{
 775        int i;
 776                
 777        /*
 778         *      Shut down all active vif entries
 779         */
 780        for(i=0; i<maxvif; i++) {
 781                if (!(vif_table[i].flags&VIFF_STATIC))
 782                        vif_delete(i);
 783        }
 784
 785        /*
 786         *      Wipe the cache
 787         */
 788        for (i=0;i<MFC_LINES;i++) {
 789                struct mfc_cache *c, **cp;
 790
 791                cp = &mfc_cache_array[i];
 792                while ((c = *cp) != NULL) {
 793                        if (c->mfc_flags&MFC_STATIC) {
 794                                cp = &c->next;
 795                                continue;
 796                        }
 797                        write_lock_bh(&mrt_lock);
 798                        *cp = c->next;
 799                        write_unlock_bh(&mrt_lock);
 800
 801                        kmem_cache_free(mrt_cachep, c);
 802                }
 803        }
 804
 805        if (atomic_read(&cache_resolve_queue_len) != 0) {
 806                struct mfc_cache *c;
 807
 808                spin_lock_bh(&mfc_unres_lock);
 809                while (mfc_unres_queue != NULL) {
 810                        c = mfc_unres_queue;
 811                        mfc_unres_queue = c->next;
 812                        spin_unlock_bh(&mfc_unres_lock);
 813
 814                        ipmr_destroy_unres(c);
 815
 816                        spin_lock_bh(&mfc_unres_lock);
 817                }
 818                spin_unlock_bh(&mfc_unres_lock);
 819        }
 820}
 821
 822static void mrtsock_destruct(struct sock *sk)
 823{
 824        rtnl_lock();
 825        if (sk == mroute_socket) {
 826                ipv4_devconf.mc_forwarding--;
 827
 828                write_lock_bh(&mrt_lock);
 829                mroute_socket=NULL;
 830                write_unlock_bh(&mrt_lock);
 831
 832                mroute_clean_tables(sk);
 833        }
 834        rtnl_unlock();
 835}
 836
 837/*
 838 *      Socket options and virtual interface manipulation. The whole
 839 *      virtual interface system is a complete heap, but unfortunately
 840 *      that's how BSD mrouted happens to think. Maybe one day with a proper
 841 *      MOSPF/PIM router set up we can clean this up.
 842 */
 843 
 844int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
 845{
 846        int ret;
 847        struct vifctl vif;
 848        struct mfcctl mfc;
 849        
 850        if(optname!=MRT_INIT)
 851        {
 852                if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
 853                        return -EACCES;
 854        }
 855
 856        switch(optname)
 857        {
 858                case MRT_INIT:
 859                        if (sk->sk_type != SOCK_RAW ||
 860                            inet_sk(sk)->num != IPPROTO_IGMP)
 861                                return -EOPNOTSUPP;
 862                        if(optlen!=sizeof(int))
 863                                return -ENOPROTOOPT;
 864
 865                        rtnl_lock();
 866                        if (mroute_socket) {
 867                                rtnl_unlock();
 868                                return -EADDRINUSE;
 869                        }
 870
 871                        ret = ip_ra_control(sk, 1, mrtsock_destruct);
 872                        if (ret == 0) {
 873                                write_lock_bh(&mrt_lock);
 874                                mroute_socket=sk;
 875                                write_unlock_bh(&mrt_lock);
 876
 877                                ipv4_devconf.mc_forwarding++;
 878                        }
 879                        rtnl_unlock();
 880                        return ret;
 881                case MRT_DONE:
 882                        if (sk!=mroute_socket)
 883                                return -EACCES;
 884                        return ip_ra_control(sk, 0, NULL);
 885                case MRT_ADD_VIF:
 886                case MRT_DEL_VIF:
 887                        if(optlen!=sizeof(vif))
 888                                return -EINVAL;
 889                        if (copy_from_user(&vif,optval,sizeof(vif)))
 890                                return -EFAULT; 
 891                        if(vif.vifc_vifi >= MAXVIFS)
 892                                return -ENFILE;
 893                        rtnl_lock();
 894                        if (optname==MRT_ADD_VIF) {
 895                                ret = vif_add(&vif, sk==mroute_socket);
 896                        } else {
 897                                ret = vif_delete(vif.vifc_vifi);
 898                        }
 899                        rtnl_unlock();
 900                        return ret;
 901
 902                /*
 903                 *      Manipulate the forwarding caches. These live
 904                 *      in a sort of kernel/user symbiosis.
 905                 */
 906                case MRT_ADD_MFC:
 907                case MRT_DEL_MFC:
 908                        if(optlen!=sizeof(mfc))
 909                                return -EINVAL;
 910                        if (copy_from_user(&mfc,optval, sizeof(mfc)))
 911                                return -EFAULT;
 912                        rtnl_lock();
 913                        if (optname==MRT_DEL_MFC)
 914                                ret = ipmr_mfc_delete(&mfc);
 915                        else
 916                                ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
 917                        rtnl_unlock();
 918                        return ret;
 919                /*
 920                 *      Control PIM assert.
 921                 */
 922                case MRT_ASSERT:
 923                {
 924                        int v;
 925                        if(get_user(v,(int *)optval))
 926                                return -EFAULT;
 927                        mroute_do_assert=(v)?1:0;
 928                        return 0;
 929                }
 930#ifdef CONFIG_IP_PIMSM
 931                case MRT_PIM:
 932                {
 933                        int v, ret;
 934                        if(get_user(v,(int *)optval))
 935                                return -EFAULT;
 936                        v = (v)?1:0;
 937                        rtnl_lock();
 938                        ret = 0;
 939                        if (v != mroute_do_pim) {
 940                                mroute_do_pim = v;
 941                                mroute_do_assert = v;
 942#ifdef CONFIG_IP_PIMSM_V2
 943                                if (mroute_do_pim)
 944                                        ret = inet_add_protocol(&pim_protocol,
 945                                                                IPPROTO_PIM);
 946                                else
 947                                        ret = inet_del_protocol(&pim_protocol,
 948                                                                IPPROTO_PIM);
 949                                if (ret < 0)
 950                                        ret = -EAGAIN;
 951#endif
 952                        }
 953                        rtnl_unlock();
 954                        return ret;
 955                }
 956#endif
 957                /*
 958                 *      Spurious command, or MRT_VERSION which you cannot
 959                 *      set.
 960                 */
 961                default:
 962                        return -ENOPROTOOPT;
 963        }
 964}
 965
 966/*
 967 *      Getsock opt support for the multicast routing system.
 968 */
 969 
 970int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
 971{
 972        int olr;
 973        int val;
 974
 975        if(optname!=MRT_VERSION && 
 976#ifdef CONFIG_IP_PIMSM
 977           optname!=MRT_PIM &&
 978#endif
 979           optname!=MRT_ASSERT)
 980                return -ENOPROTOOPT;
 981
 982        if (get_user(olr, optlen))
 983                return -EFAULT;
 984
 985        olr = min_t(unsigned int, olr, sizeof(int));
 986        if (olr < 0)
 987                return -EINVAL;
 988                
 989        if(put_user(olr,optlen))
 990                return -EFAULT;
 991        if(optname==MRT_VERSION)
 992                val=0x0305;
 993#ifdef CONFIG_IP_PIMSM
 994        else if(optname==MRT_PIM)
 995                val=mroute_do_pim;
 996#endif
 997        else
 998                val=mroute_do_assert;
 999        if(copy_to_user(optval,&val,olr))
1000                return -EFAULT;
1001        return 0;
1002}
1003
1004/*
1005 *      The IP multicast ioctl support routines.
1006 */
1007 
1008int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1009{
1010        struct sioc_sg_req sr;
1011        struct sioc_vif_req vr;
1012        struct vif_device *vif;
1013        struct mfc_cache *c;
1014        
1015        switch(cmd)
1016        {
1017                case SIOCGETVIFCNT:
1018                        if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1019                                return -EFAULT; 
1020                        if(vr.vifi>=maxvif)
1021                                return -EINVAL;
1022                        read_lock(&mrt_lock);
1023                        vif=&vif_table[vr.vifi];
1024                        if(VIF_EXISTS(vr.vifi)) {
1025                                vr.icount=vif->pkt_in;
1026                                vr.ocount=vif->pkt_out;
1027                                vr.ibytes=vif->bytes_in;
1028                                vr.obytes=vif->bytes_out;
1029                                read_unlock(&mrt_lock);
1030
1031                                if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1032                                        return -EFAULT;
1033                                return 0;
1034                        }
1035                        read_unlock(&mrt_lock);
1036                        return -EADDRNOTAVAIL;
1037                case SIOCGETSGCNT:
1038                        if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1039                                return -EFAULT;
1040
1041                        read_lock(&mrt_lock);
1042                        c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1043                        if (c) {
1044                                sr.pktcnt = c->mfc_un.res.pkt;
1045                                sr.bytecnt = c->mfc_un.res.bytes;
1046                                sr.wrong_if = c->mfc_un.res.wrong_if;
1047                                read_unlock(&mrt_lock);
1048
1049                                if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1050                                        return -EFAULT;
1051                                return 0;
1052                        }
1053                        read_unlock(&mrt_lock);
1054                        return -EADDRNOTAVAIL;
1055                default:
1056                        return -ENOIOCTLCMD;
1057        }
1058}
1059
1060
1061static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1062{
1063        struct vif_device *v;
1064        int ct;
1065        if (event != NETDEV_UNREGISTER)
1066                return NOTIFY_DONE;
1067        v=&vif_table[0];
1068        for(ct=0;ct<maxvif;ct++,v++) {
1069                if (v->dev==ptr)
1070                        vif_delete(ct);
1071        }
1072        return NOTIFY_DONE;
1073}
1074
1075
1076static struct notifier_block ip_mr_notifier={
1077        .notifier_call = ipmr_device_event,
1078};
1079
1080/*
1081 *      Encapsulate a packet by attaching a valid IPIP header to it.
1082 *      This avoids tunnel drivers and other mess and gives us the speed so
1083 *      important for multicast video.
1084 */
1085 
1086static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1087{
1088        struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1089
1090        iph->version    =       4;
1091        iph->tos        =       skb->nh.iph->tos;
1092        iph->ttl        =       skb->nh.iph->ttl;
1093        iph->frag_off   =       0;
1094        iph->daddr      =       daddr;
1095        iph->saddr      =       saddr;
1096        iph->protocol   =       IPPROTO_IPIP;
1097        iph->ihl        =       5;
1098        iph->tot_len    =       htons(skb->len);
1099        ip_select_ident(iph, skb->dst, NULL);
1100        ip_send_check(iph);
1101
1102        skb->h.ipiph = skb->nh.iph;
1103        skb->nh.iph = iph;
1104        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1105#ifdef CONFIG_NETFILTER
1106        nf_conntrack_put(skb->nfct);
1107        skb->nfct = NULL;
1108#endif
1109}
1110
1111static inline int ipmr_forward_finish(struct sk_buff *skb)
1112{
1113        struct ip_options * opt = &(IPCB(skb)->opt);
1114
1115        IP_INC_STATS_BH(IpForwDatagrams);
1116
1117        if (unlikely(opt->optlen))
1118                ip_forward_options(skb);
1119
1120        return dst_output(skb);
1121}
1122
1123/*
1124 *      Processing handlers for ipmr_forward
1125 */
1126
1127static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1128{
1129        struct iphdr *iph = skb->nh.iph;
1130        struct vif_device *vif = &vif_table[vifi];
1131        struct net_device *dev;
1132        struct rtable *rt;
1133        int    encap = 0;
1134
1135        if (vif->dev == NULL)
1136                goto out_free;
1137
1138#ifdef CONFIG_IP_PIMSM
1139        if (vif->flags & VIFF_REGISTER) {
1140                vif->pkt_out++;
1141                vif->bytes_out+=skb->len;
1142                ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1143                ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1144                ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1145                kfree_skb(skb);
1146                return;
1147        }
1148#endif
1149
1150        if (vif->flags&VIFF_TUNNEL) {
1151                struct flowi fl = { .oif = vif->link,
1152                                    .nl_u = { .ip4_u =
1153                                              { .daddr = vif->remote,
1154                                                .saddr = vif->local,
1155                                                .tos = RT_TOS(iph->tos) } },
1156                                    .proto = IPPROTO_IPIP };
1157                if (ip_route_output_key(&rt, &fl))
1158                        goto out_free;
1159                encap = sizeof(struct iphdr);
1160        } else {
1161                struct flowi fl = { .oif = vif->link,
1162                                    .nl_u = { .ip4_u =
1163                                              { .daddr = iph->daddr,
1164                                                .tos = RT_TOS(iph->tos) } },
1165                                    .proto = IPPROTO_IPIP };
1166                if (ip_route_output_key(&rt, &fl))
1167                        goto out_free;
1168        }
1169
1170        dev = rt->u.dst.dev;
1171
1172        if (skb->len+encap > dst_pmtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1173                /* Do not fragment multicasts. Alas, IPv4 does not
1174                   allow to send ICMP, so that packets will disappear
1175                   to blackhole.
1176                 */
1177
1178                IP_INC_STATS_BH(IpFragFails);
1179                ip_rt_put(rt);
1180                goto out_free;
1181        }
1182
1183        encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1184
1185        if (skb_cow(skb, encap)) {
1186                ip_rt_put(rt);
1187                goto out_free;
1188        }
1189
1190        vif->pkt_out++;
1191        vif->bytes_out+=skb->len;
1192
1193        dst_release(skb->dst);
1194        skb->dst = &rt->u.dst;
1195        iph = skb->nh.iph;
1196        ip_decrease_ttl(iph);
1197
1198        /* FIXME: forward and output firewalls used to be called here.
1199         * What do we do with netfilter? -- RR */
1200        if (vif->flags & VIFF_TUNNEL) {
1201                ip_encap(skb, vif->local, vif->remote);
1202                /* FIXME: extra output firewall step used to be here. --RR */
1203                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1204                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
1205        }
1206
1207        IPCB(skb)->flags |= IPSKB_FORWARDED;
1208
1209        /*
1210         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1211         * not only before forwarding, but after forwarding on all output
1212         * interfaces. It is clear, if mrouter runs a multicasting
1213         * program, it should receive packets not depending to what interface
1214         * program is joined.
1215         * If we will not make it, the program will have to join on all
1216         * interfaces. On the other hand, multihoming host (or router, but
1217         * not mrouter) cannot join to more than one interface - it will
1218         * result in receiving multiple packets.
1219         */
1220        NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 
1221                ipmr_forward_finish);
1222        return;
1223
1224out_free:
1225        kfree_skb(skb);
1226        return;
1227}
1228
1229static int ipmr_find_vif(struct net_device *dev)
1230{
1231        int ct;
1232        for (ct=maxvif-1; ct>=0; ct--) {
1233                if (vif_table[ct].dev == dev)
1234                        break;
1235        }
1236        return ct;
1237}
1238
1239/* "local" means that we should preserve one skb (for local delivery) */
1240
1241static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1242{
1243        int psend = -1;
1244        int vif, ct;
1245
1246        vif = cache->mfc_parent;
1247        cache->mfc_un.res.pkt++;
1248        cache->mfc_un.res.bytes += skb->len;
1249
1250        /*
1251         * Wrong interface: drop packet and (maybe) send PIM assert.
1252         */
1253        if (vif_table[vif].dev != skb->dev) {
1254                int true_vifi;
1255
1256                if (((struct rtable*)skb->dst)->fl.iif == 0) {
1257                        /* It is our own packet, looped back.
1258                           Very complicated situation...
1259
1260                           The best workaround until routing daemons will be
1261                           fixed is not to redistribute packet, if it was
1262                           send through wrong interface. It means, that
1263                           multicast applications WILL NOT work for
1264                           (S,G), which have default multicast route pointing
1265                           to wrong oif. In any case, it is not a good
1266                           idea to use multicasting applications on router.
1267                         */
1268                        goto dont_forward;
1269                }
1270
1271                cache->mfc_un.res.wrong_if++;
1272                true_vifi = ipmr_find_vif(skb->dev);
1273
1274                if (true_vifi >= 0 && mroute_do_assert &&
1275                    /* pimsm uses asserts, when switching from RPT to SPT,
1276                       so that we cannot check that packet arrived on an oif.
1277                       It is bad, but otherwise we would need to move pretty
1278                       large chunk of pimd to kernel. Ough... --ANK
1279                     */
1280                    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1281                    time_after(jiffies, 
1282                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1283                        cache->mfc_un.res.last_assert = jiffies;
1284                        ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1285                }
1286                goto dont_forward;
1287        }
1288
1289        vif_table[vif].pkt_in++;
1290        vif_table[vif].bytes_in+=skb->len;
1291
1292        /*
1293         *      Forward the frame
1294         */
1295        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1296                if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1297                        if (psend != -1) {
1298                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1299                                if (skb2)
1300                                        ipmr_queue_xmit(skb2, cache, psend);
1301                        }
1302                        psend=ct;
1303                }
1304        }
1305        if (psend != -1) {
1306                if (local) {
1307                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1308                        if (skb2)
1309                                ipmr_queue_xmit(skb2, cache, psend);
1310                } else {
1311                        ipmr_queue_xmit(skb, cache, psend);
1312                        return 0;
1313                }
1314        }
1315
1316dont_forward:
1317        if (!local)
1318                kfree_skb(skb);
1319        return 0;
1320}
1321
1322
1323/*
1324 *      Multicast packets for forwarding arrive here
1325 */
1326
1327int ip_mr_input(struct sk_buff *skb)
1328{
1329        struct mfc_cache *cache;
1330        int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1331
1332        /* Packet is looped back after forward, it should not be
1333           forwarded second time, but still can be delivered locally.
1334         */
1335        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1336                goto dont_forward;
1337
1338        if (!local) {
1339                    if (IPCB(skb)->opt.router_alert) {
1340                            if (ip_call_ra_chain(skb))
1341                                    return 0;
1342                    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1343                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1344                               Cisco IOS <= 11.2(8)) do not put router alert
1345                               option to IGMP packets destined to routable
1346                               groups. It is very bad, because it means
1347                               that we can forward NO IGMP messages.
1348                             */
1349                            read_lock(&mrt_lock);
1350                            if (mroute_socket) {
1351                                    raw_rcv(mroute_socket, skb);
1352                                    read_unlock(&mrt_lock);
1353                                    return 0;
1354                            }
1355                            read_unlock(&mrt_lock);
1356                    }
1357        }
1358
1359        read_lock(&mrt_lock);
1360        cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1361
1362        /*
1363         *      No usable cache entry
1364         */
1365        if (cache==NULL) {
1366                int vif;
1367
1368                if (local) {
1369                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1370                        ip_local_deliver(skb);
1371                        if (skb2 == NULL) {
1372                                read_unlock(&mrt_lock);
1373                                return -ENOBUFS;
1374                        }
1375                        skb = skb2;
1376                }
1377
1378                vif = ipmr_find_vif(skb->dev);
1379                if (vif >= 0) {
1380                        int err = ipmr_cache_unresolved(vif, skb);
1381                        read_unlock(&mrt_lock);
1382
1383                        return err;
1384                }
1385                read_unlock(&mrt_lock);
1386                kfree_skb(skb);
1387                return -ENODEV;
1388        }
1389
1390        ip_mr_forward(skb, cache, local);
1391
1392        read_unlock(&mrt_lock);
1393
1394        if (local)
1395                return ip_local_deliver(skb);
1396
1397        return 0;
1398
1399dont_forward:
1400        if (local)
1401                return ip_local_deliver(skb);
1402        kfree_skb(skb);
1403        return 0;
1404}
1405
1406#ifdef CONFIG_IP_PIMSM_V1
1407/*
1408 * Handle IGMP messages of PIMv1
1409 */
1410
1411int pim_rcv_v1(struct sk_buff * skb)
1412{
1413        struct igmphdr *pim;
1414        struct iphdr   *encap;
1415        struct net_device  *reg_dev = NULL;
1416
1417        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1418                goto drop;
1419
1420        pim = (struct igmphdr*)skb->h.raw;
1421
1422        if (!mroute_do_pim ||
1423            skb->len < sizeof(*pim) + sizeof(*encap) ||
1424            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 
1425                goto drop;
1426
1427        encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1428        /*
1429           Check that:
1430           a. packet is really destinted to a multicast group
1431           b. packet is not a NULL-REGISTER
1432           c. packet is not truncated
1433         */
1434        if (!MULTICAST(encap->daddr) ||
1435            encap->tot_len == 0 ||
1436            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1437                goto drop;
1438
1439        read_lock(&mrt_lock);
1440        if (reg_vif_num >= 0)
1441                reg_dev = vif_table[reg_vif_num].dev;
1442        if (reg_dev)
1443                dev_hold(reg_dev);
1444        read_unlock(&mrt_lock);
1445
1446        if (reg_dev == NULL) 
1447                goto drop;
1448
1449        skb->mac.raw = skb->nh.raw;
1450        skb_pull(skb, (u8*)encap - skb->data);
1451        skb->nh.iph = (struct iphdr *)skb->data;
1452        skb->dev = reg_dev;
1453        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1454        skb->protocol = htons(ETH_P_IP);
1455        skb->ip_summed = 0;
1456        skb->pkt_type = PACKET_HOST;
1457        dst_release(skb->dst);
1458        skb->dst = NULL;
1459        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1460        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1461#ifdef CONFIG_NETFILTER
1462        nf_conntrack_put(skb->nfct);
1463        skb->nfct = NULL;
1464#endif
1465        netif_rx(skb);
1466        dev_put(reg_dev);
1467        return 0;
1468 drop:
1469        kfree_skb(skb);
1470        return 0;
1471}
1472#endif
1473
1474#ifdef CONFIG_IP_PIMSM_V2
1475static int pim_rcv(struct sk_buff * skb)
1476{
1477        struct pimreghdr *pim;
1478        struct iphdr   *encap;
1479        struct net_device  *reg_dev = NULL;
1480
1481        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1482                goto drop;
1483
1484        pim = (struct pimreghdr*)skb->h.raw;
1485        if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1486            (pim->flags&PIM_NULL_REGISTER) ||
1487            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 
1488             (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 
1489                goto drop;
1490
1491        /* check if the inner packet is destined to mcast group */
1492        encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1493        if (!MULTICAST(encap->daddr) ||
1494            encap->tot_len == 0 ||
1495            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1496                goto drop;
1497
1498        read_lock(&mrt_lock);
1499        if (reg_vif_num >= 0)
1500                reg_dev = vif_table[reg_vif_num].dev;
1501        if (reg_dev)
1502                dev_hold(reg_dev);
1503        read_unlock(&mrt_lock);
1504
1505        if (reg_dev == NULL) 
1506                goto drop;
1507
1508        skb->mac.raw = skb->nh.raw;
1509        skb_pull(skb, (u8*)encap - skb->data);
1510        skb->nh.iph = (struct iphdr *)skb->data;
1511        skb->dev = reg_dev;
1512        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1513        skb->protocol = htons(ETH_P_IP);
1514        skb->ip_summed = 0;
1515        skb->pkt_type = PACKET_HOST;
1516        dst_release(skb->dst);
1517        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1518        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1519        skb->dst = NULL;
1520#ifdef CONFIG_NETFILTER
1521        nf_conntrack_put(skb->nfct);
1522        skb->nfct = NULL;
1523#endif
1524        netif_rx(skb);
1525        dev_put(reg_dev);
1526        return 0;
1527 drop:
1528        kfree_skb(skb);
1529        return 0;
1530}
1531#endif
1532
1533static int
1534ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1535{
1536        int ct;
1537        struct rtnexthop *nhp;
1538        struct net_device *dev = vif_table[c->mfc_parent].dev;
1539        u8 *b = skb->tail;
1540        struct rtattr *mp_head;
1541
1542        if (dev)
1543                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1544
1545        mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1546
1547        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1548                if (c->mfc_un.res.ttls[ct] < 255) {
1549                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1550                                goto rtattr_failure;
1551                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1552                        nhp->rtnh_flags = 0;
1553                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1554                        nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1555                        nhp->rtnh_len = sizeof(*nhp);
1556                }
1557        }
1558        mp_head->rta_type = RTA_MULTIPATH;
1559        mp_head->rta_len = skb->tail - (u8*)mp_head;
1560        rtm->rtm_type = RTN_MULTICAST;
1561        return 1;
1562
1563rtattr_failure:
1564        skb_trim(skb, b - skb->data);
1565        return -EMSGSIZE;
1566}
1567
1568int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1569{
1570        int err;
1571        struct mfc_cache *cache;
1572        struct rtable *rt = (struct rtable*)skb->dst;
1573
1574        read_lock(&mrt_lock);
1575        cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1576
1577        if (cache==NULL) {
1578                struct net_device *dev;
1579                int vif;
1580
1581                if (nowait) {
1582                        read_unlock(&mrt_lock);
1583                        return -EAGAIN;
1584                }
1585
1586                dev = skb->dev;
1587                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1588                        read_unlock(&mrt_lock);
1589                        return -ENODEV;
1590                }
1591                skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1592                skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1593                skb->nh.iph->saddr = rt->rt_src;
1594                skb->nh.iph->daddr = rt->rt_dst;
1595                skb->nh.iph->version = 0;
1596                err = ipmr_cache_unresolved(vif, skb);
1597                read_unlock(&mrt_lock);
1598                return err;
1599        }
1600
1601        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1602                cache->mfc_flags |= MFC_NOTIFY;
1603        err = ipmr_fill_mroute(skb, cache, rtm);
1604        read_unlock(&mrt_lock);
1605        return err;
1606}
1607
1608#ifdef CONFIG_PROC_FS   
1609/*
1610 *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1611 */
1612struct ipmr_vif_iter {
1613        int ct;
1614};
1615
1616static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1617                                           loff_t pos)
1618{
1619        for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1620                if(!VIF_EXISTS(iter->ct))
1621                        continue;
1622                if (pos-- == 0) 
1623                        return &vif_table[iter->ct];
1624        }
1625        return NULL;
1626}
1627
1628static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1629{
1630        read_lock(&mrt_lock);
1631        return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 
1632                : SEQ_START_TOKEN;
1633}
1634
1635static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1636{
1637        struct ipmr_vif_iter *iter = seq->private;
1638
1639        ++*pos;
1640        if (v == SEQ_START_TOKEN)
1641                return ipmr_vif_seq_idx(iter, 0);
1642        
1643        while (++iter->ct < maxvif) {
1644                if(!VIF_EXISTS(iter->ct))
1645                        continue;
1646                return &vif_table[iter->ct];
1647        }
1648        return NULL;
1649}
1650
1651static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1652{
1653        read_unlock(&mrt_lock);
1654}
1655
1656static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1657{
1658        if (v == SEQ_START_TOKEN) {
1659                seq_puts(seq, 
1660                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1661        } else {
1662                const struct vif_device *vif = v;
1663                const char *name =  vif->dev ? vif->dev->name : "none";
1664
1665                seq_printf(seq,
1666                           "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1667                           vif - vif_table,
1668                           name, vif->bytes_in, vif->pkt_in, 
1669                           vif->bytes_out, vif->pkt_out,
1670                           vif->flags, vif->local, vif->remote);
1671        }
1672        return 0;
1673}
1674
1675static struct seq_operations ipmr_vif_seq_ops = {
1676        .start = ipmr_vif_seq_start,
1677        .next  = ipmr_vif_seq_next,
1678        .stop  = ipmr_vif_seq_stop,
1679        .show  = ipmr_vif_seq_show,
1680};
1681
1682static int ipmr_vif_open(struct inode *inode, struct file *file)
1683{
1684        struct seq_file *seq;
1685        int rc = -ENOMEM;
1686        struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1687       
1688        if (!s)
1689                goto out;
1690
1691        rc = seq_open(file, &ipmr_vif_seq_ops);
1692        if (rc)
1693                goto out_kfree;
1694
1695        s->ct = 0;
1696        seq = file->private_data;
1697        seq->private = s;
1698out:
1699        return rc;
1700out_kfree:
1701        kfree(s);
1702        goto out;
1703
1704}
1705
1706static struct file_operations ipmr_vif_fops = {
1707        .owner   = THIS_MODULE,
1708        .open    = ipmr_vif_open,
1709        .read    = seq_read,
1710        .llseek  = seq_lseek,
1711        .release = seq_release,
1712};
1713
1714struct ipmr_mfc_iter {
1715        struct mfc_cache **cache;
1716        int ct;
1717};
1718
1719
1720static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1721{
1722        struct mfc_cache *mfc;
1723
1724        it->cache = mfc_cache_array;
1725        read_lock(&mrt_lock);
1726        for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 
1727                for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 
1728                        if (pos-- == 0) 
1729                                return mfc;
1730        read_unlock(&mrt_lock);
1731
1732        it->cache = &mfc_unres_queue;
1733        spin_lock_bh(&mfc_unres_lock);
1734        for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 
1735                if (pos-- == 0)
1736                        return mfc;
1737        spin_unlock_bh(&mfc_unres_lock);
1738
1739        it->cache = NULL;
1740        return NULL;
1741}
1742
1743
1744static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1745{
1746        return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 
1747                : SEQ_START_TOKEN;
1748}
1749
1750static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1751{
1752        struct mfc_cache *mfc = v;
1753        struct ipmr_mfc_iter *it = seq->private;
1754
1755        ++*pos;
1756
1757        if (v == SEQ_START_TOKEN)
1758                return ipmr_mfc_seq_idx(seq->private, 0);
1759
1760        if (mfc->next)
1761                return mfc->next;
1762        
1763        if (it->cache == &mfc_unres_queue) 
1764                goto end_of_list;
1765
1766        BUG_ON(it->cache != mfc_cache_array);
1767
1768        while (++it->ct < MFC_LINES) {
1769                mfc = mfc_cache_array[it->ct];
1770                if (mfc)
1771                        return mfc;
1772        }
1773
1774        /* exhausted cache_array, show unresolved */
1775        read_unlock(&mrt_lock);
1776        it->cache = &mfc_unres_queue;
1777        it->ct = 0;
1778                
1779        spin_lock_bh(&mfc_unres_lock);
1780        mfc = mfc_unres_queue;
1781        if (mfc) 
1782                return mfc;
1783
1784 end_of_list:
1785        spin_unlock_bh(&mfc_unres_lock);
1786        it->cache = NULL;
1787
1788        return NULL;
1789}
1790
1791static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1792{
1793        struct ipmr_mfc_iter *it = seq->private;
1794
1795        if (it->cache == &mfc_unres_queue)
1796                spin_unlock_bh(&mfc_unres_lock);
1797        else if (it->cache == mfc_cache_array)
1798                read_unlock(&mrt_lock);
1799}
1800
1801static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1802{
1803        int n;
1804
1805        if (v == SEQ_START_TOKEN) {
1806                seq_puts(seq, 
1807                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1808        } else {
1809                const struct mfc_cache *mfc = v;
1810                const struct ipmr_mfc_iter *it = seq->private;
1811                
1812                seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1813                           (unsigned long) mfc->mfc_mcastgrp,
1814                           (unsigned long) mfc->mfc_origin,
1815                           mfc->mfc_parent,
1816                           mfc->mfc_un.res.pkt,
1817                           mfc->mfc_un.res.bytes,
1818                           mfc->mfc_un.res.wrong_if);
1819
1820                if (it->cache != &mfc_unres_queue) {
1821                        for(n = mfc->mfc_un.res.minvif; 
1822                            n < mfc->mfc_un.res.maxvif; n++ ) {
1823                                if(VIF_EXISTS(n) 
1824                                   && mfc->mfc_un.res.ttls[n] < 255)
1825                                seq_printf(seq, 
1826                                           " %2d:%-3d", 
1827                                           n, mfc->mfc_un.res.ttls[n]);
1828                        }
1829                }
1830                seq_putc(seq, '\n');
1831        }
1832        return 0;
1833}
1834
1835static struct seq_operations ipmr_mfc_seq_ops = {
1836        .start = ipmr_mfc_seq_start,
1837        .next  = ipmr_mfc_seq_next,
1838        .stop  = ipmr_mfc_seq_stop,
1839        .show  = ipmr_mfc_seq_show,
1840};
1841
1842static int ipmr_mfc_open(struct inode *inode, struct file *file)
1843{
1844        struct seq_file *seq;
1845        int rc = -ENOMEM;
1846        struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1847       
1848        if (!s)
1849                goto out;
1850
1851        rc = seq_open(file, &ipmr_mfc_seq_ops);
1852        if (rc)
1853                goto out_kfree;
1854
1855        memset(s, 0, sizeof(*s));
1856        seq = file->private_data;
1857        seq->private = s;
1858out:
1859        return rc;
1860out_kfree:
1861        kfree(s);
1862        goto out;
1863
1864}
1865
1866static struct file_operations ipmr_mfc_fops = {
1867        .owner   = THIS_MODULE,
1868        .open    = ipmr_mfc_open,
1869        .read    = seq_read,
1870        .llseek  = seq_lseek,
1871        .release = seq_release,
1872};
1873#endif  
1874
1875#ifdef CONFIG_IP_PIMSM_V2
1876static struct inet_protocol pim_protocol = {
1877        .handler        =       pim_rcv,
1878};
1879#endif
1880
1881
1882/*
1883 *      Setup for IP multicast routing
1884 */
1885 
1886void __init ip_mr_init(void)
1887{
1888        mrt_cachep = kmem_cache_create("ip_mrt_cache",
1889                                       sizeof(struct mfc_cache),
1890                                       0, SLAB_HWCACHE_ALIGN,
1891                                       NULL, NULL);
1892        init_timer(&ipmr_expire_timer);
1893        ipmr_expire_timer.function=ipmr_expire_process;
1894        register_netdevice_notifier(&ip_mr_notifier);
1895#ifdef CONFIG_PROC_FS   
1896        proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1897        proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1898#endif  
1899}
1900
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.