linux-bk/net/ipv4/ipmr.c
<<
>>
Prefs
   1/*
   2 *      IP multicast routing support for mrouted 3.6/3.8
   3 *
   4 *              (c) 1995 Alan Cox, <alan@redhat.com>
   5 *        Linux Consultancy and Custom Driver Development
   6 *
   7 *      This program is free software; you can redistribute it and/or
   8 *      modify it under the terms of the GNU General Public License
   9 *      as published by the Free Software Foundation; either version
  10 *      2 of the License, or (at your option) any later version.
  11 *
  12 *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
  13 *
  14 *      Fixes:
  15 *      Michael Chastain        :       Incorrect size of copying.
  16 *      Alan Cox                :       Added the cache manager code
  17 *      Alan Cox                :       Fixed the clone/copy bug and device race.
  18 *      Mike McLagan            :       Routing by source
  19 *      Malcolm Beattie         :       Buffer handling fixes.
  20 *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  21 *      SVR Anand               :       Fixed several multicast bugs and problems.
  22 *      Alexey Kuznetsov        :       Status, optimisations and more.
  23 *      Brad Parker             :       Better behaviour on mrouted upcall
  24 *                                      overflow.
  25 *      Carlos Picoto           :       PIMv1 Support
  26 *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  27 *                                      Relax this requrement to work with older peers.
  28 *
  29 */
  30
  31#include <linux/config.h>
  32#include <asm/system.h>
  33#include <asm/uaccess.h>
  34#include <linux/types.h>
  35#include <linux/sched.h>
  36#include <linux/errno.h>
  37#include <linux/timer.h>
  38#include <linux/mm.h>
  39#include <linux/kernel.h>
  40#include <linux/fcntl.h>
  41#include <linux/stat.h>
  42#include <linux/socket.h>
  43#include <linux/in.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/inetdevice.h>
  47#include <linux/igmp.h>
  48#include <linux/proc_fs.h>
  49#include <linux/seq_file.h>
  50#include <linux/mroute.h>
  51#include <linux/init.h>
  52#include <net/ip.h>
  53#include <net/protocol.h>
  54#include <linux/skbuff.h>
  55#include <net/sock.h>
  56#include <net/icmp.h>
  57#include <net/udp.h>
  58#include <net/raw.h>
  59#include <linux/notifier.h>
  60#include <linux/if_arp.h>
  61#include <linux/netfilter_ipv4.h>
  62#include <net/ipip.h>
  63#include <net/checksum.h>
  64
  65#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  66#define CONFIG_IP_PIMSM 1
  67#endif
  68
  69static struct sock *mroute_socket;
  70
  71
  72/* Big lock, protecting vif table, mrt cache and mroute socket state.
  73   Note that the changes are semaphored via rtnl_lock.
  74 */
  75
  76static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
  77
  78/*
  79 *      Multicast router control variables
  80 */
  81
  82static struct vif_device vif_table[MAXVIFS];            /* Devices              */
  83static int maxvif;
  84
  85#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
  86
  87static int mroute_do_assert;                            /* Set in PIM assert    */
  88static int mroute_do_pim;
  89
  90static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
  91
  92static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
  93static atomic_t cache_resolve_queue_len;                /* Size of unresolved   */
  94
  95/* Special spinlock for queue of unresolved entries */
  96static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
  97
  98/* We return to original Alan's scheme. Hash table of resolved
  99   entries is changed only in process context and protected
 100   with weak lock mrt_lock. Queue of unresolved entries is protected
 101   with strong spinlock mfc_unres_lock.
 102
 103   In this case data path is free of exclusive locks at all.
 104 */
 105
 106static kmem_cache_t *mrt_cachep;
 107
 108static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
 109static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
 110static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 111
 112#ifdef CONFIG_IP_PIMSM_V2
 113static struct net_protocol pim_protocol;
 114#endif
 115
 116static struct timer_list ipmr_expire_timer;
 117
 118/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 119
 120static
 121struct net_device *ipmr_new_tunnel(struct vifctl *v)
 122{
 123        struct net_device  *dev;
 124
 125        dev = __dev_get_by_name("tunl0");
 126
 127        if (dev) {
 128                int err;
 129                struct ifreq ifr;
 130                mm_segment_t    oldfs;
 131                struct ip_tunnel_parm p;
 132                struct in_device  *in_dev;
 133
 134                memset(&p, 0, sizeof(p));
 135                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 136                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 137                p.iph.version = 4;
 138                p.iph.ihl = 5;
 139                p.iph.protocol = IPPROTO_IPIP;
 140                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 141                ifr.ifr_ifru.ifru_data = (void*)&p;
 142
 143                oldfs = get_fs(); set_fs(KERNEL_DS);
 144                err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 145                set_fs(oldfs);
 146
 147                dev = NULL;
 148
 149                if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
 150                        dev->flags |= IFF_MULTICAST;
 151
 152                        in_dev = __in_dev_get(dev);
 153                        if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
 154                                goto failure;
 155                        in_dev->cnf.rp_filter = 0;
 156
 157                        if (dev_open(dev))
 158                                goto failure;
 159                }
 160        }
 161        return dev;
 162
 163failure:
 164        /* allow the register to be completed before unregistering. */
 165        rtnl_unlock();
 166        rtnl_lock();
 167
 168        unregister_netdevice(dev);
 169        return NULL;
 170}
 171
 172#ifdef CONFIG_IP_PIMSM
 173
 174static int reg_vif_num = -1;
 175
 176static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 177{
 178        read_lock(&mrt_lock);
 179        ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
 180        ((struct net_device_stats*)dev->priv)->tx_packets++;
 181        ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 182        read_unlock(&mrt_lock);
 183        kfree_skb(skb);
 184        return 0;
 185}
 186
 187static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
 188{
 189        return (struct net_device_stats*)dev->priv;
 190}
 191
 192static void reg_vif_setup(struct net_device *dev)
 193{
 194        dev->type               = ARPHRD_PIMREG;
 195        dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
 196        dev->flags              = IFF_NOARP;
 197        dev->hard_start_xmit    = reg_vif_xmit;
 198        dev->get_stats          = reg_vif_get_stats;
 199        dev->destructor         = free_netdev;
 200}
 201
 202static struct net_device *ipmr_reg_vif(void)
 203{
 204        struct net_device *dev;
 205        struct in_device *in_dev;
 206
 207        dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
 208                           reg_vif_setup);
 209
 210        if (dev == NULL)
 211                return NULL;
 212
 213        if (register_netdevice(dev)) {
 214                free_netdev(dev);
 215                return NULL;
 216        }
 217        dev->iflink = 0;
 218
 219        if ((in_dev = inetdev_init(dev)) == NULL)
 220                goto failure;
 221
 222        in_dev->cnf.rp_filter = 0;
 223
 224        if (dev_open(dev))
 225                goto failure;
 226
 227        return dev;
 228
 229failure:
 230        /* allow the register to be completed before unregistering. */
 231        rtnl_unlock();
 232        rtnl_lock();
 233
 234        unregister_netdevice(dev);
 235        return NULL;
 236}
 237#endif
 238
 239/*
 240 *      Delete a VIF entry
 241 */
 242 
 243static int vif_delete(int vifi)
 244{
 245        struct vif_device *v;
 246        struct net_device *dev;
 247        struct in_device *in_dev;
 248
 249        if (vifi < 0 || vifi >= maxvif)
 250                return -EADDRNOTAVAIL;
 251
 252        v = &vif_table[vifi];
 253
 254        write_lock_bh(&mrt_lock);
 255        dev = v->dev;
 256        v->dev = NULL;
 257
 258        if (!dev) {
 259                write_unlock_bh(&mrt_lock);
 260                return -EADDRNOTAVAIL;
 261        }
 262
 263#ifdef CONFIG_IP_PIMSM
 264        if (vifi == reg_vif_num)
 265                reg_vif_num = -1;
 266#endif
 267
 268        if (vifi+1 == maxvif) {
 269                int tmp;
 270                for (tmp=vifi-1; tmp>=0; tmp--) {
 271                        if (VIF_EXISTS(tmp))
 272                                break;
 273                }
 274                maxvif = tmp+1;
 275        }
 276
 277        write_unlock_bh(&mrt_lock);
 278
 279        dev_set_allmulti(dev, -1);
 280
 281        if ((in_dev = __in_dev_get(dev)) != NULL) {
 282                in_dev->cnf.mc_forwarding--;
 283                ip_rt_multicast_event(in_dev);
 284        }
 285
 286        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 287                unregister_netdevice(dev);
 288
 289        dev_put(dev);
 290        return 0;
 291}
 292
 293/* Destroy an unresolved cache entry, killing queued skbs
 294   and reporting error to netlink readers.
 295 */
 296
 297static void ipmr_destroy_unres(struct mfc_cache *c)
 298{
 299        struct sk_buff *skb;
 300
 301        atomic_dec(&cache_resolve_queue_len);
 302
 303        while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
 304                if (skb->nh.iph->version == 0) {
 305                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 306                        nlh->nlmsg_type = NLMSG_ERROR;
 307                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 308                        skb_trim(skb, nlh->nlmsg_len);
 309                        ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
 310                        netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
 311                } else
 312                        kfree_skb(skb);
 313        }
 314
 315        kmem_cache_free(mrt_cachep, c);
 316}
 317
 318
 319/* Single timer process for all the unresolved queue. */
 320
 321static void ipmr_expire_process(unsigned long dummy)
 322{
 323        unsigned long now;
 324        unsigned long expires;
 325        struct mfc_cache *c, **cp;
 326
 327        if (!spin_trylock(&mfc_unres_lock)) {
 328                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 329                return;
 330        }
 331
 332        if (atomic_read(&cache_resolve_queue_len) == 0)
 333                goto out;
 334
 335        now = jiffies;
 336        expires = 10*HZ;
 337        cp = &mfc_unres_queue;
 338
 339        while ((c=*cp) != NULL) {
 340                if (time_after(c->mfc_un.unres.expires, now)) {
 341                        unsigned long interval = c->mfc_un.unres.expires - now;
 342                        if (interval < expires)
 343                                expires = interval;
 344                        cp = &c->next;
 345                        continue;
 346                }
 347
 348                *cp = c->next;
 349
 350                ipmr_destroy_unres(c);
 351        }
 352
 353        if (atomic_read(&cache_resolve_queue_len))
 354                mod_timer(&ipmr_expire_timer, jiffies + expires);
 355
 356out:
 357        spin_unlock(&mfc_unres_lock);
 358}
 359
 360/* Fill oifs list. It is called under write locked mrt_lock. */
 361
 362static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
 363{
 364        int vifi;
 365
 366        cache->mfc_un.res.minvif = MAXVIFS;
 367        cache->mfc_un.res.maxvif = 0;
 368        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 369
 370        for (vifi=0; vifi<maxvif; vifi++) {
 371                if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
 372                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 373                        if (cache->mfc_un.res.minvif > vifi)
 374                                cache->mfc_un.res.minvif = vifi;
 375                        if (cache->mfc_un.res.maxvif <= vifi)
 376                                cache->mfc_un.res.maxvif = vifi + 1;
 377                }
 378        }
 379}
 380
 381static int vif_add(struct vifctl *vifc, int mrtsock)
 382{
 383        int vifi = vifc->vifc_vifi;
 384        struct vif_device *v = &vif_table[vifi];
 385        struct net_device *dev;
 386        struct in_device *in_dev;
 387
 388        /* Is vif busy ? */
 389        if (VIF_EXISTS(vifi))
 390                return -EADDRINUSE;
 391
 392        switch (vifc->vifc_flags) {
 393#ifdef CONFIG_IP_PIMSM
 394        case VIFF_REGISTER:
 395                /*
 396                 * Special Purpose VIF in PIM
 397                 * All the packets will be sent to the daemon
 398                 */
 399                if (reg_vif_num >= 0)
 400                        return -EADDRINUSE;
 401                dev = ipmr_reg_vif();
 402                if (!dev)
 403                        return -ENOBUFS;
 404                break;
 405#endif
 406        case VIFF_TUNNEL:       
 407                dev = ipmr_new_tunnel(vifc);
 408                if (!dev)
 409                        return -ENOBUFS;
 410                break;
 411        case 0:
 412                dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
 413                if (!dev)
 414                        return -EADDRNOTAVAIL;
 415                __dev_put(dev);
 416                break;
 417        default:
 418                return -EINVAL;
 419        }
 420
 421        if ((in_dev = __in_dev_get(dev)) == NULL)
 422                return -EADDRNOTAVAIL;
 423        in_dev->cnf.mc_forwarding++;
 424        dev_set_allmulti(dev, +1);
 425        ip_rt_multicast_event(in_dev);
 426
 427        /*
 428         *      Fill in the VIF structures
 429         */
 430        v->rate_limit=vifc->vifc_rate_limit;
 431        v->local=vifc->vifc_lcl_addr.s_addr;
 432        v->remote=vifc->vifc_rmt_addr.s_addr;
 433        v->flags=vifc->vifc_flags;
 434        if (!mrtsock)
 435                v->flags |= VIFF_STATIC;
 436        v->threshold=vifc->vifc_threshold;
 437        v->bytes_in = 0;
 438        v->bytes_out = 0;
 439        v->pkt_in = 0;
 440        v->pkt_out = 0;
 441        v->link = dev->ifindex;
 442        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 443                v->link = dev->iflink;
 444
 445        /* And finish update writing critical data */
 446        write_lock_bh(&mrt_lock);
 447        dev_hold(dev);
 448        v->dev=dev;
 449#ifdef CONFIG_IP_PIMSM
 450        if (v->flags&VIFF_REGISTER)
 451                reg_vif_num = vifi;
 452#endif
 453        if (vifi+1 > maxvif)
 454                maxvif = vifi+1;
 455        write_unlock_bh(&mrt_lock);
 456        return 0;
 457}
 458
 459static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
 460{
 461        int line=MFC_HASH(mcastgrp,origin);
 462        struct mfc_cache *c;
 463
 464        for (c=mfc_cache_array[line]; c; c = c->next) {
 465                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 466                        break;
 467        }
 468        return c;
 469}
 470
 471/*
 472 *      Allocate a multicast cache entry
 473 */
 474static struct mfc_cache *ipmr_cache_alloc(void)
 475{
 476        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
 477        if(c==NULL)
 478                return NULL;
 479        memset(c, 0, sizeof(*c));
 480        c->mfc_un.res.minvif = MAXVIFS;
 481        return c;
 482}
 483
 484static struct mfc_cache *ipmr_cache_alloc_unres(void)
 485{
 486        struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
 487        if(c==NULL)
 488                return NULL;
 489        memset(c, 0, sizeof(*c));
 490        skb_queue_head_init(&c->mfc_un.unres.unresolved);
 491        c->mfc_un.unres.expires = jiffies + 10*HZ;
 492        return c;
 493}
 494
 495/*
 496 *      A cache entry has gone into a resolved state from queued
 497 */
 498 
 499static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 500{
 501        struct sk_buff *skb;
 502
 503        /*
 504         *      Play the pending entries through our router
 505         */
 506
 507        while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 508                if (skb->nh.iph->version == 0) {
 509                        int err;
 510                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 511
 512                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 513                                nlh->nlmsg_len = skb->tail - (u8*)nlh;
 514                        } else {
 515                                nlh->nlmsg_type = NLMSG_ERROR;
 516                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 517                                skb_trim(skb, nlh->nlmsg_len);
 518                                ((struct nlmsgerr*)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 519                        }
 520                        err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
 521                } else
 522                        ip_mr_forward(skb, c, 0);
 523        }
 524}
 525
 526/*
 527 *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 528 *      expects the following bizarre scheme.
 529 *
 530 *      Called under mrt_lock.
 531 */
 532 
 533static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 534{
 535        struct sk_buff *skb;
 536        int ihl = pkt->nh.iph->ihl<<2;
 537        struct igmphdr *igmp;
 538        struct igmpmsg *msg;
 539        int ret;
 540
 541#ifdef CONFIG_IP_PIMSM
 542        if (assert == IGMPMSG_WHOLEPKT)
 543                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 544        else
 545#endif
 546                skb = alloc_skb(128, GFP_ATOMIC);
 547
 548        if(!skb)
 549                return -ENOBUFS;
 550
 551#ifdef CONFIG_IP_PIMSM
 552        if (assert == IGMPMSG_WHOLEPKT) {
 553                /* Ugly, but we have no choice with this interface.
 554                   Duplicate old header, fix ihl, length etc.
 555                   And all this only to mangle msg->im_msgtype and
 556                   to set msg->im_mbz to "mbz" :-)
 557                 */
 558                msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
 559                skb->nh.raw = skb->h.raw = (u8*)msg;
 560                memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
 561                msg->im_msgtype = IGMPMSG_WHOLEPKT;
 562                msg->im_mbz = 0;
 563                msg->im_vif = reg_vif_num;
 564                skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
 565                skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
 566        } else 
 567#endif
 568        {       
 569                
 570        /*
 571         *      Copy the IP header
 572         */
 573
 574        skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
 575        memcpy(skb->data,pkt->data,ihl);
 576        skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
 577        msg = (struct igmpmsg*)skb->nh.iph;
 578        msg->im_vif = vifi;
 579        skb->dst = dst_clone(pkt->dst);
 580
 581        /*
 582         *      Add our header
 583         */
 584
 585        igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
 586        igmp->type      =
 587        msg->im_msgtype = assert;
 588        igmp->code      =       0;
 589        skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
 590        skb->h.raw = skb->nh.raw;
 591        }
 592
 593        if (mroute_socket == NULL) {
 594                kfree_skb(skb);
 595                return -EINVAL;
 596        }
 597
 598        /*
 599         *      Deliver to mrouted
 600         */
 601        if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
 602                if (net_ratelimit())
 603                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 604                kfree_skb(skb);
 605        }
 606
 607        return ret;
 608}
 609
 610/*
 611 *      Queue a packet for resolution. It gets locked cache entry!
 612 */
 613 
 614static int
 615ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 616{
 617        int err;
 618        struct mfc_cache *c;
 619
 620        spin_lock_bh(&mfc_unres_lock);
 621        for (c=mfc_unres_queue; c; c=c->next) {
 622                if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
 623                    c->mfc_origin == skb->nh.iph->saddr)
 624                        break;
 625        }
 626
 627        if (c == NULL) {
 628                /*
 629                 *      Create a new entry if allowable
 630                 */
 631
 632                if (atomic_read(&cache_resolve_queue_len)>=10 ||
 633                    (c=ipmr_cache_alloc_unres())==NULL) {
 634                        spin_unlock_bh(&mfc_unres_lock);
 635
 636                        kfree_skb(skb);
 637                        return -ENOBUFS;
 638                }
 639
 640                /*
 641                 *      Fill in the new cache entry
 642                 */
 643                c->mfc_parent=-1;
 644                c->mfc_origin=skb->nh.iph->saddr;
 645                c->mfc_mcastgrp=skb->nh.iph->daddr;
 646
 647                /*
 648                 *      Reflect first query at mrouted.
 649                 */
 650                if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
 651                        /* If the report failed throw the cache entry 
 652                           out - Brad Parker
 653                         */
 654                        spin_unlock_bh(&mfc_unres_lock);
 655
 656                        kmem_cache_free(mrt_cachep, c);
 657                        kfree_skb(skb);
 658                        return err;
 659                }
 660
 661                atomic_inc(&cache_resolve_queue_len);
 662                c->next = mfc_unres_queue;
 663                mfc_unres_queue = c;
 664
 665                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 666        }
 667
 668        /*
 669         *      See if we can append the packet
 670         */
 671        if (c->mfc_un.unres.unresolved.qlen>3) {
 672                kfree_skb(skb);
 673                err = -ENOBUFS;
 674        } else {
 675                skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
 676                err = 0;
 677        }
 678
 679        spin_unlock_bh(&mfc_unres_lock);
 680        return err;
 681}
 682
 683/*
 684 *      MFC cache manipulation by user space mroute daemon
 685 */
 686
 687static int ipmr_mfc_delete(struct mfcctl *mfc)
 688{
 689        int line;
 690        struct mfc_cache *c, **cp;
 691
 692        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 693
 694        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 695                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 696                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 697                        write_lock_bh(&mrt_lock);
 698                        *cp = c->next;
 699                        write_unlock_bh(&mrt_lock);
 700
 701                        kmem_cache_free(mrt_cachep, c);
 702                        return 0;
 703                }
 704        }
 705        return -ENOENT;
 706}
 707
 708static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 709{
 710        int line;
 711        struct mfc_cache *uc, *c, **cp;
 712
 713        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 714
 715        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 716                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 717                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 718                        break;
 719        }
 720
 721        if (c != NULL) {
 722                write_lock_bh(&mrt_lock);
 723                c->mfc_parent = mfc->mfcc_parent;
 724                ipmr_update_threshoulds(c, mfc->mfcc_ttls);
 725                if (!mrtsock)
 726                        c->mfc_flags |= MFC_STATIC;
 727                write_unlock_bh(&mrt_lock);
 728                return 0;
 729        }
 730
 731        if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
 732                return -EINVAL;
 733
 734        c=ipmr_cache_alloc();
 735        if (c==NULL)
 736                return -ENOMEM;
 737
 738        c->mfc_origin=mfc->mfcc_origin.s_addr;
 739        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
 740        c->mfc_parent=mfc->mfcc_parent;
 741        ipmr_update_threshoulds(c, mfc->mfcc_ttls);
 742        if (!mrtsock)
 743                c->mfc_flags |= MFC_STATIC;
 744
 745        write_lock_bh(&mrt_lock);
 746        c->next = mfc_cache_array[line];
 747        mfc_cache_array[line] = c;
 748        write_unlock_bh(&mrt_lock);
 749
 750        /*
 751         *      Check to see if we resolved a queued list. If so we
 752         *      need to send on the frames and tidy up.
 753         */
 754        spin_lock_bh(&mfc_unres_lock);
 755        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 756             cp = &uc->next) {
 757                if (uc->mfc_origin == c->mfc_origin &&
 758                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 759                        *cp = uc->next;
 760                        if (atomic_dec_and_test(&cache_resolve_queue_len))
 761                                del_timer(&ipmr_expire_timer);
 762                        break;
 763                }
 764        }
 765        spin_unlock_bh(&mfc_unres_lock);
 766
 767        if (uc) {
 768                ipmr_cache_resolve(uc, c);
 769                kmem_cache_free(mrt_cachep, uc);
 770        }
 771        return 0;
 772}
 773
 774/*
 775 *      Close the multicast socket, and clear the vif tables etc
 776 */
 777 
 778static void mroute_clean_tables(struct sock *sk)
 779{
 780        int i;
 781                
 782        /*
 783         *      Shut down all active vif entries
 784         */
 785        for(i=0; i<maxvif; i++) {
 786                if (!(vif_table[i].flags&VIFF_STATIC))
 787                        vif_delete(i);
 788        }
 789
 790        /*
 791         *      Wipe the cache
 792         */
 793        for (i=0;i<MFC_LINES;i++) {
 794                struct mfc_cache *c, **cp;
 795
 796                cp = &mfc_cache_array[i];
 797                while ((c = *cp) != NULL) {
 798                        if (c->mfc_flags&MFC_STATIC) {
 799                                cp = &c->next;
 800                                continue;
 801                        }
 802                        write_lock_bh(&mrt_lock);
 803                        *cp = c->next;
 804                        write_unlock_bh(&mrt_lock);
 805
 806                        kmem_cache_free(mrt_cachep, c);
 807                }
 808        }
 809
 810        if (atomic_read(&cache_resolve_queue_len) != 0) {
 811                struct mfc_cache *c;
 812
 813                spin_lock_bh(&mfc_unres_lock);
 814                while (mfc_unres_queue != NULL) {
 815                        c = mfc_unres_queue;
 816                        mfc_unres_queue = c->next;
 817                        spin_unlock_bh(&mfc_unres_lock);
 818
 819                        ipmr_destroy_unres(c);
 820
 821                        spin_lock_bh(&mfc_unres_lock);
 822                }
 823                spin_unlock_bh(&mfc_unres_lock);
 824        }
 825}
 826
 827static void mrtsock_destruct(struct sock *sk)
 828{
 829        rtnl_lock();
 830        if (sk == mroute_socket) {
 831                ipv4_devconf.mc_forwarding--;
 832
 833                write_lock_bh(&mrt_lock);
 834                mroute_socket=NULL;
 835                write_unlock_bh(&mrt_lock);
 836
 837                mroute_clean_tables(sk);
 838        }
 839        rtnl_unlock();
 840}
 841
 842/*
 843 *      Socket options and virtual interface manipulation. The whole
 844 *      virtual interface system is a complete heap, but unfortunately
 845 *      that's how BSD mrouted happens to think. Maybe one day with a proper
 846 *      MOSPF/PIM router set up we can clean this up.
 847 */
 848 
 849int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
 850{
 851        int ret;
 852        struct vifctl vif;
 853        struct mfcctl mfc;
 854        
 855        if(optname!=MRT_INIT)
 856        {
 857                if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
 858                        return -EACCES;
 859        }
 860
 861        switch(optname)
 862        {
 863                case MRT_INIT:
 864                        if (sk->sk_type != SOCK_RAW ||
 865                            inet_sk(sk)->num != IPPROTO_IGMP)
 866                                return -EOPNOTSUPP;
 867                        if(optlen!=sizeof(int))
 868                                return -ENOPROTOOPT;
 869
 870                        rtnl_lock();
 871                        if (mroute_socket) {
 872                                rtnl_unlock();
 873                                return -EADDRINUSE;
 874                        }
 875
 876                        ret = ip_ra_control(sk, 1, mrtsock_destruct);
 877                        if (ret == 0) {
 878                                write_lock_bh(&mrt_lock);
 879                                mroute_socket=sk;
 880                                write_unlock_bh(&mrt_lock);
 881
 882                                ipv4_devconf.mc_forwarding++;
 883                        }
 884                        rtnl_unlock();
 885                        return ret;
 886                case MRT_DONE:
 887                        if (sk!=mroute_socket)
 888                                return -EACCES;
 889                        return ip_ra_control(sk, 0, NULL);
 890                case MRT_ADD_VIF:
 891                case MRT_DEL_VIF:
 892                        if(optlen!=sizeof(vif))
 893                                return -EINVAL;
 894                        if (copy_from_user(&vif,optval,sizeof(vif)))
 895                                return -EFAULT; 
 896                        if(vif.vifc_vifi >= MAXVIFS)
 897                                return -ENFILE;
 898                        rtnl_lock();
 899                        if (optname==MRT_ADD_VIF) {
 900                                ret = vif_add(&vif, sk==mroute_socket);
 901                        } else {
 902                                ret = vif_delete(vif.vifc_vifi);
 903                        }
 904                        rtnl_unlock();
 905                        return ret;
 906
 907                /*
 908                 *      Manipulate the forwarding caches. These live
 909                 *      in a sort of kernel/user symbiosis.
 910                 */
 911                case MRT_ADD_MFC:
 912                case MRT_DEL_MFC:
 913                        if(optlen!=sizeof(mfc))
 914                                return -EINVAL;
 915                        if (copy_from_user(&mfc,optval, sizeof(mfc)))
 916                                return -EFAULT;
 917                        rtnl_lock();
 918                        if (optname==MRT_DEL_MFC)
 919                                ret = ipmr_mfc_delete(&mfc);
 920                        else
 921                                ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
 922                        rtnl_unlock();
 923                        return ret;
 924                /*
 925                 *      Control PIM assert.
 926                 */
 927                case MRT_ASSERT:
 928                {
 929                        int v;
 930                        if(get_user(v,(int __user *)optval))
 931                                return -EFAULT;
 932                        mroute_do_assert=(v)?1:0;
 933                        return 0;
 934                }
 935#ifdef CONFIG_IP_PIMSM
 936                case MRT_PIM:
 937                {
 938                        int v, ret;
 939                        if(get_user(v,(int __user *)optval))
 940                                return -EFAULT;
 941                        v = (v)?1:0;
 942                        rtnl_lock();
 943                        ret = 0;
 944                        if (v != mroute_do_pim) {
 945                                mroute_do_pim = v;
 946                                mroute_do_assert = v;
 947#ifdef CONFIG_IP_PIMSM_V2
 948                                if (mroute_do_pim)
 949                                        ret = inet_add_protocol(&pim_protocol,
 950                                                                IPPROTO_PIM);
 951                                else
 952                                        ret = inet_del_protocol(&pim_protocol,
 953                                                                IPPROTO_PIM);
 954                                if (ret < 0)
 955                                        ret = -EAGAIN;
 956#endif
 957                        }
 958                        rtnl_unlock();
 959                        return ret;
 960                }
 961#endif
 962                /*
 963                 *      Spurious command, or MRT_VERSION which you cannot
 964                 *      set.
 965                 */
 966                default:
 967                        return -ENOPROTOOPT;
 968        }
 969}
 970
 971/*
 972 *      Getsock opt support for the multicast routing system.
 973 */
 974 
 975int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
 976{
 977        int olr;
 978        int val;
 979
 980        if(optname!=MRT_VERSION && 
 981#ifdef CONFIG_IP_PIMSM
 982           optname!=MRT_PIM &&
 983#endif
 984           optname!=MRT_ASSERT)
 985                return -ENOPROTOOPT;
 986
 987        if (get_user(olr, optlen))
 988                return -EFAULT;
 989
 990        olr = min_t(unsigned int, olr, sizeof(int));
 991        if (olr < 0)
 992                return -EINVAL;
 993                
 994        if(put_user(olr,optlen))
 995                return -EFAULT;
 996        if(optname==MRT_VERSION)
 997                val=0x0305;
 998#ifdef CONFIG_IP_PIMSM
 999        else if(optname==MRT_PIM)
1000                val=mroute_do_pim;
1001#endif
1002        else
1003                val=mroute_do_assert;
1004        if(copy_to_user(optval,&val,olr))
1005                return -EFAULT;
1006        return 0;
1007}
1008
1009/*
1010 *      The IP multicast ioctl support routines.
1011 */
1012 
1013int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1014{
1015        struct sioc_sg_req sr;
1016        struct sioc_vif_req vr;
1017        struct vif_device *vif;
1018        struct mfc_cache *c;
1019        
1020        switch(cmd)
1021        {
1022                case SIOCGETVIFCNT:
1023                        if (copy_from_user(&vr,arg,sizeof(vr)))
1024                                return -EFAULT; 
1025                        if(vr.vifi>=maxvif)
1026                                return -EINVAL;
1027                        read_lock(&mrt_lock);
1028                        vif=&vif_table[vr.vifi];
1029                        if(VIF_EXISTS(vr.vifi)) {
1030                                vr.icount=vif->pkt_in;
1031                                vr.ocount=vif->pkt_out;
1032                                vr.ibytes=vif->bytes_in;
1033                                vr.obytes=vif->bytes_out;
1034                                read_unlock(&mrt_lock);
1035
1036                                if (copy_to_user(arg,&vr,sizeof(vr)))
1037                                        return -EFAULT;
1038                                return 0;
1039                        }
1040                        read_unlock(&mrt_lock);
1041                        return -EADDRNOTAVAIL;
1042                case SIOCGETSGCNT:
1043                        if (copy_from_user(&sr,arg,sizeof(sr)))
1044                                return -EFAULT;
1045
1046                        read_lock(&mrt_lock);
1047                        c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1048                        if (c) {
1049                                sr.pktcnt = c->mfc_un.res.pkt;
1050                                sr.bytecnt = c->mfc_un.res.bytes;
1051                                sr.wrong_if = c->mfc_un.res.wrong_if;
1052                                read_unlock(&mrt_lock);
1053
1054                                if (copy_to_user(arg,&sr,sizeof(sr)))
1055                                        return -EFAULT;
1056                                return 0;
1057                        }
1058                        read_unlock(&mrt_lock);
1059                        return -EADDRNOTAVAIL;
1060                default:
1061                        return -ENOIOCTLCMD;
1062        }
1063}
1064
1065
1066static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1067{
1068        struct vif_device *v;
1069        int ct;
1070        if (event != NETDEV_UNREGISTER)
1071                return NOTIFY_DONE;
1072        v=&vif_table[0];
1073        for(ct=0;ct<maxvif;ct++,v++) {
1074                if (v->dev==ptr)
1075                        vif_delete(ct);
1076        }
1077        return NOTIFY_DONE;
1078}
1079
1080
1081static struct notifier_block ip_mr_notifier={
1082        .notifier_call = ipmr_device_event,
1083};
1084
1085/*
1086 *      Encapsulate a packet by attaching a valid IPIP header to it.
1087 *      This avoids tunnel drivers and other mess and gives us the speed so
1088 *      important for multicast video.
1089 */
1090 
1091static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1092{
1093        struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1094
1095        iph->version    =       4;
1096        iph->tos        =       skb->nh.iph->tos;
1097        iph->ttl        =       skb->nh.iph->ttl;
1098        iph->frag_off   =       0;
1099        iph->daddr      =       daddr;
1100        iph->saddr      =       saddr;
1101        iph->protocol   =       IPPROTO_IPIP;
1102        iph->ihl        =       5;
1103        iph->tot_len    =       htons(skb->len);
1104        ip_select_ident(iph, skb->dst, NULL);
1105        ip_send_check(iph);
1106
1107        skb->h.ipiph = skb->nh.iph;
1108        skb->nh.iph = iph;
1109        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1110        nf_reset(skb);
1111}
1112
1113static inline int ipmr_forward_finish(struct sk_buff *skb)
1114{
1115        struct ip_options * opt = &(IPCB(skb)->opt);
1116
1117        IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
1118
1119        if (unlikely(opt->optlen))
1120                ip_forward_options(skb);
1121
1122        return dst_output(skb);
1123}
1124
1125/*
1126 *      Processing handlers for ipmr_forward
1127 */
1128
1129static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1130{
1131        struct iphdr *iph = skb->nh.iph;
1132        struct vif_device *vif = &vif_table[vifi];
1133        struct net_device *dev;
1134        struct rtable *rt;
1135        int    encap = 0;
1136
1137        if (vif->dev == NULL)
1138                goto out_free;
1139
1140#ifdef CONFIG_IP_PIMSM
1141        if (vif->flags & VIFF_REGISTER) {
1142                vif->pkt_out++;
1143                vif->bytes_out+=skb->len;
1144                ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1145                ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1146                ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1147                kfree_skb(skb);
1148                return;
1149        }
1150#endif
1151
1152        if (vif->flags&VIFF_TUNNEL) {
1153                struct flowi fl = { .oif = vif->link,
1154                                    .nl_u = { .ip4_u =
1155                                              { .daddr = vif->remote,
1156                                                .saddr = vif->local,
1157                                                .tos = RT_TOS(iph->tos) } },
1158                                    .proto = IPPROTO_IPIP };
1159                if (ip_route_output_key(&rt, &fl))
1160                        goto out_free;
1161                encap = sizeof(struct iphdr);
1162        } else {
1163                struct flowi fl = { .oif = vif->link,
1164                                    .nl_u = { .ip4_u =
1165                                              { .daddr = iph->daddr,
1166                                                .tos = RT_TOS(iph->tos) } },
1167                                    .proto = IPPROTO_IPIP };
1168                if (ip_route_output_key(&rt, &fl))
1169                        goto out_free;
1170        }
1171
1172        dev = rt->u.dst.dev;
1173
1174        if (skb->len+encap > dst_pmtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1175                /* Do not fragment multicasts. Alas, IPv4 does not
1176                   allow to send ICMP, so that packets will disappear
1177                   to blackhole.
1178                 */
1179
1180                IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
1181                ip_rt_put(rt);
1182                goto out_free;
1183        }
1184
1185        encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1186
1187        if (skb_cow(skb, encap)) {
1188                ip_rt_put(rt);
1189                goto out_free;
1190        }
1191
1192        vif->pkt_out++;
1193        vif->bytes_out+=skb->len;
1194
1195        dst_release(skb->dst);
1196        skb->dst = &rt->u.dst;
1197        iph = skb->nh.iph;
1198        ip_decrease_ttl(iph);
1199
1200        /* FIXME: forward and output firewalls used to be called here.
1201         * What do we do with netfilter? -- RR */
1202        if (vif->flags & VIFF_TUNNEL) {
1203                ip_encap(skb, vif->local, vif->remote);
1204                /* FIXME: extra output firewall step used to be here. --RR */
1205                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1206                ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb->len;
1207        }
1208
1209        IPCB(skb)->flags |= IPSKB_FORWARDED;
1210
1211        /*
1212         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1213         * not only before forwarding, but after forwarding on all output
1214         * interfaces. It is clear, if mrouter runs a multicasting
1215         * program, it should receive packets not depending to what interface
1216         * program is joined.
1217         * If we will not make it, the program will have to join on all
1218         * interfaces. On the other hand, multihoming host (or router, but
1219         * not mrouter) cannot join to more than one interface - it will
1220         * result in receiving multiple packets.
1221         */
1222        NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev, 
1223                ipmr_forward_finish);
1224        return;
1225
1226out_free:
1227        kfree_skb(skb);
1228        return;
1229}
1230
1231static int ipmr_find_vif(struct net_device *dev)
1232{
1233        int ct;
1234        for (ct=maxvif-1; ct>=0; ct--) {
1235                if (vif_table[ct].dev == dev)
1236                        break;
1237        }
1238        return ct;
1239}
1240
1241/* "local" means that we should preserve one skb (for local delivery) */
1242
1243static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1244{
1245        int psend = -1;
1246        int vif, ct;
1247
1248        vif = cache->mfc_parent;
1249        cache->mfc_un.res.pkt++;
1250        cache->mfc_un.res.bytes += skb->len;
1251
1252        /*
1253         * Wrong interface: drop packet and (maybe) send PIM assert.
1254         */
1255        if (vif_table[vif].dev != skb->dev) {
1256                int true_vifi;
1257
1258                if (((struct rtable*)skb->dst)->fl.iif == 0) {
1259                        /* It is our own packet, looped back.
1260                           Very complicated situation...
1261
1262                           The best workaround until routing daemons will be
1263                           fixed is not to redistribute packet, if it was
1264                           send through wrong interface. It means, that
1265                           multicast applications WILL NOT work for
1266                           (S,G), which have default multicast route pointing
1267                           to wrong oif. In any case, it is not a good
1268                           idea to use multicasting applications on router.
1269                         */
1270                        goto dont_forward;
1271                }
1272
1273                cache->mfc_un.res.wrong_if++;
1274                true_vifi = ipmr_find_vif(skb->dev);
1275
1276                if (true_vifi >= 0 && mroute_do_assert &&
1277                    /* pimsm uses asserts, when switching from RPT to SPT,
1278                       so that we cannot check that packet arrived on an oif.
1279                       It is bad, but otherwise we would need to move pretty
1280                       large chunk of pimd to kernel. Ough... --ANK
1281                     */
1282                    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1283                    time_after(jiffies, 
1284                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1285                        cache->mfc_un.res.last_assert = jiffies;
1286                        ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1287                }
1288                goto dont_forward;
1289        }
1290
1291        vif_table[vif].pkt_in++;
1292        vif_table[vif].bytes_in+=skb->len;
1293
1294        /*
1295         *      Forward the frame
1296         */
1297        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1298                if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1299                        if (psend != -1) {
1300                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1301                                if (skb2)
1302                                        ipmr_queue_xmit(skb2, cache, psend);
1303                        }
1304                        psend=ct;
1305                }
1306        }
1307        if (psend != -1) {
1308                if (local) {
1309                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1310                        if (skb2)
1311                                ipmr_queue_xmit(skb2, cache, psend);
1312                } else {
1313                        ipmr_queue_xmit(skb, cache, psend);
1314                        return 0;
1315                }
1316        }
1317
1318dont_forward:
1319        if (!local)
1320                kfree_skb(skb);
1321        return 0;
1322}
1323
1324
1325/*
1326 *      Multicast packets for forwarding arrive here
1327 */
1328
1329int ip_mr_input(struct sk_buff *skb)
1330{
1331        struct mfc_cache *cache;
1332        int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1333
1334        /* Packet is looped back after forward, it should not be
1335           forwarded second time, but still can be delivered locally.
1336         */
1337        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1338                goto dont_forward;
1339
1340        if (!local) {
1341                    if (IPCB(skb)->opt.router_alert) {
1342                            if (ip_call_ra_chain(skb))
1343                                    return 0;
1344                    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1345                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1346                               Cisco IOS <= 11.2(8)) do not put router alert
1347                               option to IGMP packets destined to routable
1348                               groups. It is very bad, because it means
1349                               that we can forward NO IGMP messages.
1350                             */
1351                            read_lock(&mrt_lock);
1352                            if (mroute_socket) {
1353                                    raw_rcv(mroute_socket, skb);
1354                                    read_unlock(&mrt_lock);
1355                                    return 0;
1356                            }
1357                            read_unlock(&mrt_lock);
1358                    }
1359        }
1360
1361        read_lock(&mrt_lock);
1362        cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1363
1364        /*
1365         *      No usable cache entry
1366         */
1367        if (cache==NULL) {
1368                int vif;
1369
1370                if (local) {
1371                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1372                        ip_local_deliver(skb);
1373                        if (skb2 == NULL) {
1374                                read_unlock(&mrt_lock);
1375                                return -ENOBUFS;
1376                        }
1377                        skb = skb2;
1378                }
1379
1380                vif = ipmr_find_vif(skb->dev);
1381                if (vif >= 0) {
1382                        int err = ipmr_cache_unresolved(vif, skb);
1383                        read_unlock(&mrt_lock);
1384
1385                        return err;
1386                }
1387                read_unlock(&mrt_lock);
1388                kfree_skb(skb);
1389                return -ENODEV;
1390        }
1391
1392        ip_mr_forward(skb, cache, local);
1393
1394        read_unlock(&mrt_lock);
1395
1396        if (local)
1397                return ip_local_deliver(skb);
1398
1399        return 0;
1400
1401dont_forward:
1402        if (local)
1403                return ip_local_deliver(skb);
1404        kfree_skb(skb);
1405        return 0;
1406}
1407
1408#ifdef CONFIG_IP_PIMSM_V1
1409/*
1410 * Handle IGMP messages of PIMv1
1411 */
1412
1413int pim_rcv_v1(struct sk_buff * skb)
1414{
1415        struct igmphdr *pim;
1416        struct iphdr   *encap;
1417        struct net_device  *reg_dev = NULL;
1418
1419        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1420                goto drop;
1421
1422        pim = (struct igmphdr*)skb->h.raw;
1423
1424        if (!mroute_do_pim ||
1425            skb->len < sizeof(*pim) + sizeof(*encap) ||
1426            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 
1427                goto drop;
1428
1429        encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1430        /*
1431           Check that:
1432           a. packet is really destinted to a multicast group
1433           b. packet is not a NULL-REGISTER
1434           c. packet is not truncated
1435         */
1436        if (!MULTICAST(encap->daddr) ||
1437            encap->tot_len == 0 ||
1438            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1439                goto drop;
1440
1441        read_lock(&mrt_lock);
1442        if (reg_vif_num >= 0)
1443                reg_dev = vif_table[reg_vif_num].dev;
1444        if (reg_dev)
1445                dev_hold(reg_dev);
1446        read_unlock(&mrt_lock);
1447
1448        if (reg_dev == NULL) 
1449                goto drop;
1450
1451        skb->mac.raw = skb->nh.raw;
1452        skb_pull(skb, (u8*)encap - skb->data);
1453        skb->nh.iph = (struct iphdr *)skb->data;
1454        skb->dev = reg_dev;
1455        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1456        skb->protocol = htons(ETH_P_IP);
1457        skb->ip_summed = 0;
1458        skb->pkt_type = PACKET_HOST;
1459        dst_release(skb->dst);
1460        skb->dst = NULL;
1461        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1462        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1463        nf_reset(skb);
1464        netif_rx(skb);
1465        dev_put(reg_dev);
1466        return 0;
1467 drop:
1468        kfree_skb(skb);
1469        return 0;
1470}
1471#endif
1472
1473#ifdef CONFIG_IP_PIMSM_V2
1474static int pim_rcv(struct sk_buff * skb)
1475{
1476        struct pimreghdr *pim;
1477        struct iphdr   *encap;
1478        struct net_device  *reg_dev = NULL;
1479
1480        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 
1481                goto drop;
1482
1483        pim = (struct pimreghdr*)skb->h.raw;
1484        if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1485            (pim->flags&PIM_NULL_REGISTER) ||
1486            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 
1487             (u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 
1488                goto drop;
1489
1490        /* check if the inner packet is destined to mcast group */
1491        encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1492        if (!MULTICAST(encap->daddr) ||
1493            encap->tot_len == 0 ||
1494            ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
1495                goto drop;
1496
1497        read_lock(&mrt_lock);
1498        if (reg_vif_num >= 0)
1499                reg_dev = vif_table[reg_vif_num].dev;
1500        if (reg_dev)
1501                dev_hold(reg_dev);
1502        read_unlock(&mrt_lock);
1503
1504        if (reg_dev == NULL) 
1505                goto drop;
1506
1507        skb->mac.raw = skb->nh.raw;
1508        skb_pull(skb, (u8*)encap - skb->data);
1509        skb->nh.iph = (struct iphdr *)skb->data;
1510        skb->dev = reg_dev;
1511        memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1512        skb->protocol = htons(ETH_P_IP);
1513        skb->ip_summed = 0;
1514        skb->pkt_type = PACKET_HOST;
1515        dst_release(skb->dst);
1516        ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1517        ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1518        skb->dst = NULL;
1519        nf_reset(skb);
1520        netif_rx(skb);
1521        dev_put(reg_dev);
1522        return 0;
1523 drop:
1524        kfree_skb(skb);
1525        return 0;
1526}
1527#endif
1528
1529static int
1530ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1531{
1532        int ct;
1533        struct rtnexthop *nhp;
1534        struct net_device *dev = vif_table[c->mfc_parent].dev;
1535        u8 *b = skb->tail;
1536        struct rtattr *mp_head;
1537
1538        if (dev)
1539                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1540
1541        mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1542
1543        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1544                if (c->mfc_un.res.ttls[ct] < 255) {
1545                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1546                                goto rtattr_failure;
1547                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1548                        nhp->rtnh_flags = 0;
1549                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1550                        nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1551                        nhp->rtnh_len = sizeof(*nhp);
1552                }
1553        }
1554        mp_head->rta_type = RTA_MULTIPATH;
1555        mp_head->rta_len = skb->tail - (u8*)mp_head;
1556        rtm->rtm_type = RTN_MULTICAST;
1557        return 1;
1558
1559rtattr_failure:
1560        skb_trim(skb, b - skb->data);
1561        return -EMSGSIZE;
1562}
1563
1564int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1565{
1566        int err;
1567        struct mfc_cache *cache;
1568        struct rtable *rt = (struct rtable*)skb->dst;
1569
1570        read_lock(&mrt_lock);
1571        cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1572
1573        if (cache==NULL) {
1574                struct net_device *dev;
1575                int vif;
1576
1577                if (nowait) {
1578                        read_unlock(&mrt_lock);
1579                        return -EAGAIN;
1580                }
1581
1582                dev = skb->dev;
1583                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1584                        read_unlock(&mrt_lock);
1585                        return -ENODEV;
1586                }
1587                skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1588                skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1589                skb->nh.iph->saddr = rt->rt_src;
1590                skb->nh.iph->daddr = rt->rt_dst;
1591                skb->nh.iph->version = 0;
1592                err = ipmr_cache_unresolved(vif, skb);
1593                read_unlock(&mrt_lock);
1594                return err;
1595        }
1596
1597        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1598                cache->mfc_flags |= MFC_NOTIFY;
1599        err = ipmr_fill_mroute(skb, cache, rtm);
1600        read_unlock(&mrt_lock);
1601        return err;
1602}
1603
1604#ifdef CONFIG_PROC_FS   
1605/*
1606 *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1607 */
1608struct ipmr_vif_iter {
1609        int ct;
1610};
1611
1612static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1613                                           loff_t pos)
1614{
1615        for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1616                if(!VIF_EXISTS(iter->ct))
1617                        continue;
1618                if (pos-- == 0) 
1619                        return &vif_table[iter->ct];
1620        }
1621        return NULL;
1622}
1623
1624static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1625{
1626        read_lock(&mrt_lock);
1627        return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 
1628                : SEQ_START_TOKEN;
1629}
1630
1631static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1632{
1633        struct ipmr_vif_iter *iter = seq->private;
1634
1635        ++*pos;
1636        if (v == SEQ_START_TOKEN)
1637                return ipmr_vif_seq_idx(iter, 0);
1638        
1639        while (++iter->ct < maxvif) {
1640                if(!VIF_EXISTS(iter->ct))
1641                        continue;
1642                return &vif_table[iter->ct];
1643        }
1644        return NULL;
1645}
1646
1647static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1648{
1649        read_unlock(&mrt_lock);
1650}
1651
1652static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1653{
1654        if (v == SEQ_START_TOKEN) {
1655                seq_puts(seq, 
1656                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1657        } else {
1658                const struct vif_device *vif = v;
1659                const char *name =  vif->dev ? vif->dev->name : "none";
1660
1661                seq_printf(seq,
1662                           "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1663                           vif - vif_table,
1664                           name, vif->bytes_in, vif->pkt_in, 
1665                           vif->bytes_out, vif->pkt_out,
1666                           vif->flags, vif->local, vif->remote);
1667        }
1668        return 0;
1669}
1670
1671static struct seq_operations ipmr_vif_seq_ops = {
1672        .start = ipmr_vif_seq_start,
1673        .next  = ipmr_vif_seq_next,
1674        .stop  = ipmr_vif_seq_stop,
1675        .show  = ipmr_vif_seq_show,
1676};
1677
1678static int ipmr_vif_open(struct inode *inode, struct file *file)
1679{
1680        struct seq_file *seq;
1681        int rc = -ENOMEM;
1682        struct ipmr_vif_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1683       
1684        if (!s)
1685                goto out;
1686
1687        rc = seq_open(file, &ipmr_vif_seq_ops);
1688        if (rc)
1689                goto out_kfree;
1690
1691        s->ct = 0;
1692        seq = file->private_data;
1693        seq->private = s;
1694out:
1695        return rc;
1696out_kfree:
1697        kfree(s);
1698        goto out;
1699
1700}
1701
1702static struct file_operations ipmr_vif_fops = {
1703        .owner   = THIS_MODULE,
1704        .open    = ipmr_vif_open,
1705        .read    = seq_read,
1706        .llseek  = seq_lseek,
1707        .release = seq_release_private,
1708};
1709
1710struct ipmr_mfc_iter {
1711        struct mfc_cache **cache;
1712        int ct;
1713};
1714
1715
1716static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1717{
1718        struct mfc_cache *mfc;
1719
1720        it->cache = mfc_cache_array;
1721        read_lock(&mrt_lock);
1722        for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 
1723                for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 
1724                        if (pos-- == 0) 
1725                                return mfc;
1726        read_unlock(&mrt_lock);
1727
1728        it->cache = &mfc_unres_queue;
1729        spin_lock_bh(&mfc_unres_lock);
1730        for(mfc = mfc_unres_queue; mfc; mfc = mfc->next) 
1731                if (pos-- == 0)
1732                        return mfc;
1733        spin_unlock_bh(&mfc_unres_lock);
1734
1735        it->cache = NULL;
1736        return NULL;
1737}
1738
1739
1740static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1741{
1742        struct ipmr_mfc_iter *it = seq->private;
1743        it->cache = NULL;
1744        it->ct = 0;
1745        return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 
1746                : SEQ_START_TOKEN;
1747}
1748
1749static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1750{
1751        struct mfc_cache *mfc = v;
1752        struct ipmr_mfc_iter *it = seq->private;
1753
1754        ++*pos;
1755
1756        if (v == SEQ_START_TOKEN)
1757                return ipmr_mfc_seq_idx(seq->private, 0);
1758
1759        if (mfc->next)
1760                return mfc->next;
1761        
1762        if (it->cache == &mfc_unres_queue) 
1763                goto end_of_list;
1764
1765        BUG_ON(it->cache != mfc_cache_array);
1766
1767        while (++it->ct < MFC_LINES) {
1768                mfc = mfc_cache_array[it->ct];
1769                if (mfc)
1770                        return mfc;
1771        }
1772
1773        /* exhausted cache_array, show unresolved */
1774        read_unlock(&mrt_lock);
1775        it->cache = &mfc_unres_queue;
1776        it->ct = 0;
1777                
1778        spin_lock_bh(&mfc_unres_lock);
1779        mfc = mfc_unres_queue;
1780        if (mfc) 
1781                return mfc;
1782
1783 end_of_list:
1784        spin_unlock_bh(&mfc_unres_lock);
1785        it->cache = NULL;
1786
1787        return NULL;
1788}
1789
1790static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1791{
1792        struct ipmr_mfc_iter *it = seq->private;
1793
1794        if (it->cache == &mfc_unres_queue)
1795                spin_unlock_bh(&mfc_unres_lock);
1796        else if (it->cache == mfc_cache_array)
1797                read_unlock(&mrt_lock);
1798}
1799
1800static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1801{
1802        int n;
1803
1804        if (v == SEQ_START_TOKEN) {
1805                seq_puts(seq, 
1806                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1807        } else {
1808                const struct mfc_cache *mfc = v;
1809                const struct ipmr_mfc_iter *it = seq->private;
1810                
1811                seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1812                           (unsigned long) mfc->mfc_mcastgrp,
1813                           (unsigned long) mfc->mfc_origin,
1814                           mfc->mfc_parent,
1815                           mfc->mfc_un.res.pkt,
1816                           mfc->mfc_un.res.bytes,
1817                           mfc->mfc_un.res.wrong_if);
1818
1819                if (it->cache != &mfc_unres_queue) {
1820                        for(n = mfc->mfc_un.res.minvif; 
1821                            n < mfc->mfc_un.res.maxvif; n++ ) {
1822                                if(VIF_EXISTS(n) 
1823                                   && mfc->mfc_un.res.ttls[n] < 255)
1824                                seq_printf(seq, 
1825                                           " %2d:%-3d", 
1826                                           n, mfc->mfc_un.res.ttls[n]);
1827                        }
1828                }
1829                seq_putc(seq, '\n');
1830        }
1831        return 0;
1832}
1833
1834static struct seq_operations ipmr_mfc_seq_ops = {
1835        .start = ipmr_mfc_seq_start,
1836        .next  = ipmr_mfc_seq_next,
1837        .stop  = ipmr_mfc_seq_stop,
1838        .show  = ipmr_mfc_seq_show,
1839};
1840
1841static int ipmr_mfc_open(struct inode *inode, struct file *file)
1842{
1843        struct seq_file *seq;
1844        int rc = -ENOMEM;
1845        struct ipmr_mfc_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
1846       
1847        if (!s)
1848                goto out;
1849
1850        rc = seq_open(file, &ipmr_mfc_seq_ops);
1851        if (rc)
1852                goto out_kfree;
1853
1854        seq = file->private_data;
1855        seq->private = s;
1856out:
1857        return rc;
1858out_kfree:
1859        kfree(s);
1860        goto out;
1861
1862}
1863
1864static struct file_operations ipmr_mfc_fops = {
1865        .owner   = THIS_MODULE,
1866        .open    = ipmr_mfc_open,
1867        .read    = seq_read,
1868        .llseek  = seq_lseek,
1869        .release = seq_release_private,
1870};
1871#endif  
1872
1873#ifdef CONFIG_IP_PIMSM_V2
1874static struct net_protocol pim_protocol = {
1875        .handler        =       pim_rcv,
1876};
1877#endif
1878
1879
1880/*
1881 *      Setup for IP multicast routing
1882 */
1883 
1884void __init ip_mr_init(void)
1885{
1886        mrt_cachep = kmem_cache_create("ip_mrt_cache",
1887                                       sizeof(struct mfc_cache),
1888                                       0, SLAB_HWCACHE_ALIGN,
1889                                       NULL, NULL);
1890        if (!mrt_cachep)
1891                panic("cannot allocate ip_mrt_cache");
1892
1893        init_timer(&ipmr_expire_timer);
1894        ipmr_expire_timer.function=ipmr_expire_process;
1895        register_netdevice_notifier(&ip_mr_notifier);
1896#ifdef CONFIG_PROC_FS   
1897        proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
1898        proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
1899#endif  
1900}
1901
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.