linux-bk/drivers/net/bonding.c
<<
>>
Prefs
   1/*
   2 * originally based on the dummy device.
   3 *
   4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov.  
   5 * Licensed under the GPL. Based on dummy.c, and eql.c devices.
   6 *
   7 * bonding.c: an Ethernet Bonding driver
   8 *
   9 * This is useful to talk to a Cisco EtherChannel compatible equipment:
  10 *      Cisco 5500
  11 *      Sun Trunking (Solaris)
  12 *      Alteon AceDirector Trunks
  13 *      Linux Bonding
  14 *      and probably many L2 switches ...
  15 *
  16 * How it works:
  17 *    ifconfig bond0 ipaddress netmask up
  18 *      will setup a network device, with an ip address.  No mac address 
  19 *      will be assigned at this time.  The hw mac address will come from 
  20 *      the first slave bonded to the channel.  All slaves will then use 
  21 *      this hw mac address.
  22 *
  23 *    ifconfig bond0 down
  24 *         will release all slaves, marking them as down.
  25 *
  26 *    ifenslave bond0 eth0
  27 *      will attach eth0 to bond0 as a slave.  eth0 hw mac address will either
  28 *      a: be used as initial mac address
  29 *      b: if a hw mac address already is there, eth0's hw mac address 
  30 *         will then be set from bond0.
  31 *
  32 * v0.1 - first working version.
  33 * v0.2 - changed stats to be calculated by summing slaves stats.
  34 *
  35 * Changes:
  36 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
  37 * - fix leaks on failure at bond_init
  38 *
  39 * 2000/09/30 - Willy Tarreau <willy at meta-x.org>
  40 *     - added trivial code to release a slave device.
  41 *     - fixed security bug (CAP_NET_ADMIN not checked)
  42 *     - implemented MII link monitoring to disable dead links :
  43 *       All MII capable slaves are checked every <miimon> milliseconds
  44 *       (100 ms seems good). This value can be changed by passing it to
  45 *       insmod. A value of zero disables the monitoring (default).
  46 *     - fixed an infinite loop in bond_xmit_roundrobin() when there's no
  47 *       good slave.
  48 *     - made the code hopefully SMP safe
  49 *
  50 * 2000/10/03 - Willy Tarreau <willy at meta-x.org>
  51 *     - optimized slave lists based on relevant suggestions from Thomas Davis
  52 *     - implemented active-backup method to obtain HA with two switches:
  53 *       stay as long as possible on the same active interface, while we
  54 *       also monitor the backup one (MII link status) because we want to know
  55 *       if we are able to switch at any time. ( pass "mode=1" to insmod )
  56 *     - lots of stress testings because we need it to be more robust than the
  57 *       wires ! :->
  58 *
  59 * 2000/10/09 - Willy Tarreau <willy at meta-x.org>
  60 *     - added up and down delays after link state change.
  61 *     - optimized the slaves chaining so that when we run forward, we never
  62 *       repass through the bond itself, but we can find it by searching
  63 *       backwards. Renders the deletion more difficult, but accelerates the
  64 *       scan.
  65 *     - smarter enslaving and releasing.
  66 *     - finer and more robust SMP locking
  67 *
  68 * 2000/10/17 - Willy Tarreau <willy at meta-x.org>
  69 *     - fixed two potential SMP race conditions
  70 *
  71 * 2000/10/18 - Willy Tarreau <willy at meta-x.org>
  72 *     - small fixes to the monitoring FSM in case of zero delays
  73 * 2000/11/01 - Willy Tarreau <willy at meta-x.org>
  74 *     - fixed first slave not automatically used in trunk mode.
  75 * 2000/11/10 : spelling of "EtherChannel" corrected.
  76 * 2000/11/13 : fixed a race condition in case of concurrent accesses to ioctl().
  77 * 2000/12/16 : fixed improper usage of rtnl_exlock_nowait().
  78 *
  79 * 2001/1/3 - Chad N. Tindel <ctindel at ieee dot org>
  80 *     - The bonding driver now simulates MII status monitoring, just like
  81 *       a normal network device.  It will show that the link is down iff
  82 *       every slave in the bond shows that their links are down.  If at least
  83 *       one slave is up, the bond's MII status will appear as up.
  84 *
  85 * 2001/2/7 - Chad N. Tindel <ctindel at ieee dot org>
  86 *     - Applications can now query the bond from user space to get
  87 *       information which may be useful.  They do this by calling
  88 *       the BOND_INFO_QUERY ioctl.  Once the app knows how many slaves
  89 *       are in the bond, it can call the BOND_SLAVE_INFO_QUERY ioctl to
  90 *       get slave specific information (# link failures, etc).  See
  91 *       <linux/if_bonding.h> for more details.  The structs of interest
  92 *       are ifbond and ifslave.
  93 *
  94 * 2001/4/5 - Chad N. Tindel <ctindel at ieee dot org>
  95 *     - Ported to 2.4 Kernel
  96 * 
  97 * 2001/5/2 - Jeffrey E. Mast <jeff at mastfamily dot com>
  98 *     - When a device is detached from a bond, the slave device is no longer
  99 *       left thinking that is has a master.
 100 *
 101 * 2001/5/16 - Jeffrey E. Mast <jeff at mastfamily dot com>
 102 *     - memset did not appropriately initialized the bond rw_locks. Used 
 103 *       rwlock_init to initialize to unlocked state to prevent deadlock when 
 104 *       first attempting a lock
 105 *     - Called SET_MODULE_OWNER for bond device
 106 *
 107 * 2001/5/17 - Tim Anderson <tsa at mvista.com>
 108 *     - 2 paths for releasing for slave release; 1 through ioctl
 109 *       and 2) through close. Both paths need to release the same way.
 110 *     - the free slave in bond release is changing slave status before
 111 *       the free. The netdev_set_master() is intended to change slave state
 112 *       so it should not be done as part of the release process.
 113 *     - Simple rule for slave state at release: only the active in A/B and
 114 *       only one in the trunked case.
 115 *
 116 * 2001/6/01 - Tim Anderson <tsa at mvista.com>
 117 *     - Now call dev_close when releasing a slave so it doesn't screw up
 118 *       out routing table.
 119 *
 120 * 2001/6/01 - Chad N. Tindel <ctindel at ieee dot org>
 121 *     - Added /proc support for getting bond and slave information.
 122 *       Information is in /proc/net/<bond device>/info. 
 123 *     - Changed the locking when calling bond_close to prevent deadlock.
 124 *
 125 * 2001/8/05 - Janice Girouard <girouard at us.ibm.com>
 126 *     - correct problem where refcnt of slave is not incremented in bond_ioctl
 127 *       so the system hangs when halting.
 128 *     - correct locking problem when unable to malloc in bond_enslave.
 129 *     - adding bond_xmit_xor logic.
 130 *     - adding multiple bond device support.
 131 *
 132 * 2001/8/13 - Erik Habbinga <erik_habbinga at hp dot com>
 133 *     - correct locking problem with rtnl_exlock_nowait
 134 *
 135 * 2001/8/23 - Janice Girouard <girouard at us.ibm.com>
 136 *     - bzero initial dev_bonds, to correct oops
 137 *     - convert SIOCDEVPRIVATE to new MII ioctl calls
 138 *
 139 * 2001/9/13 - Takao Indoh <indou dot takao at jp dot fujitsu dot com>
 140 *     - Add the BOND_CHANGE_ACTIVE ioctl implementation
 141 *
 142 * 2001/9/14 - Mark Huth <mhuth at mvista dot com>
 143 *     - Change MII_LINK_READY to not check for end of auto-negotiation,
 144 *       but only for an up link.
 145 *
 146 * 2001/9/20 - Chad N. Tindel <ctindel at ieee dot org>
 147 *     - Add the device field to bonding_t.  Previously the net_device 
 148 *       corresponding to a bond wasn't available from the bonding_t 
 149 *       structure.
 150 *
 151 * 2001/9/25 - Janice Girouard <girouard at us.ibm.com>
 152 *     - add arp_monitor for active backup mode
 153 *
 154 * 2001/10/23 - Takao Indoh <indou dot takao at jp dot fujitsu dot com>
 155 *     - Various memory leak fixes
 156 *
 157 * 2001/11/5 - Mark Huth <mark dot huth at mvista dot com>
 158 *     - Don't take rtnl lock in bond_mii_monitor as it deadlocks under 
 159 *       certain hotswap conditions.  
 160 *       Note:  this same change may be required in bond_arp_monitor ???
 161 *     - Remove possibility of calling bond_sethwaddr with NULL slave_dev ptr 
 162 *     - Handle hot swap ethernet interface deregistration events to remove
 163 *       kernel oops following hot swap of enslaved interface
 164 *
 165 * 2002/1/2 - Chad N. Tindel <ctindel at ieee dot org>
 166 *     - Restore original slave flags at release time.
 167 *
 168 * 2002/02/18 - Erik Habbinga <erik_habbinga at hp dot com>
 169 *     - bond_release(): calling kfree on our_slave after call to
 170 *       bond_restore_slave_flags, not before
 171 *     - bond_enslave(): saving slave flags into original_flags before
 172 *       call to netdev_set_master, so the IFF_SLAVE flag doesn't end
 173 *       up in original_flags
 174 *
 175 * 2002/04/05 - Mark Smith <mark.smith at comdev dot cc> and
 176 *              Steve Mead <steve.mead at comdev dot cc>
 177 *     - Port Gleb Natapov's multicast support patchs from 2.4.12
 178 *       to 2.4.18 adding support for multicast.
 179 *
 180 * 2002/06/17 - Tony Cureington <tony.cureington * hp_com>
 181 *     - corrected uninitialized pointer (ifr.ifr_data) in bond_check_dev_link;
 182 *       actually changed function to use ETHTOOL, then MIIPHY, and finally
 183 *       MIIREG to determine the link status
 184 *     - fixed bad ifr_data pointer assignments in bond_ioctl
 185 *     - corrected mode 1 being reported as active-backup in bond_get_info;
 186 *       also added text to distinguish type of load balancing (rr or xor)
 187 *     - change arp_ip_target module param from "1-12s" (array of 12 ptrs)
 188 *       to "s" (a single ptr)
 189 */
 190
 191#include <linux/config.h>
 192#include <linux/kernel.h>
 193#include <linux/module.h>
 194#include <linux/sched.h>
 195#include <linux/types.h>
 196#include <linux/fcntl.h>
 197#include <linux/interrupt.h>
 198#include <linux/ioport.h>
 199#include <linux/in.h>
 200#include <linux/slab.h>
 201#include <linux/string.h>
 202#include <linux/init.h>
 203#include <linux/timer.h>
 204#include <linux/socket.h>
 205#include <asm/system.h>
 206#include <asm/bitops.h>
 207#include <asm/io.h>
 208#include <asm/dma.h>
 209#include <asm/uaccess.h>
 210#include <linux/errno.h>
 211
 212#include <linux/netdevice.h>
 213#include <linux/etherdevice.h>
 214#include <linux/skbuff.h>
 215#include <net/sock.h>
 216#include <linux/rtnetlink.h>
 217
 218#include <linux/if_bonding.h>
 219#include <linux/smp.h>
 220#include <linux/if_ether.h>
 221#include <linux/if_arp.h>
 222#include <linux/mii.h>
 223#include <linux/ethtool.h>
 224
 225
 226/* monitor all links that often (in milliseconds). <=0 disables monitoring */
 227#ifndef BOND_LINK_MON_INTERV
 228#define BOND_LINK_MON_INTERV    0
 229#endif
 230
 231#undef  MII_LINK_UP
 232#define MII_LINK_UP     0x04
 233
 234#undef  MII_ENDOF_NWAY
 235#define MII_ENDOF_NWAY  0x20
 236
 237#undef  MII_LINK_READY
 238#define MII_LINK_READY  (MII_LINK_UP)
 239
 240#ifndef BOND_LINK_ARP_INTERV
 241#define BOND_LINK_ARP_INTERV    0
 242#endif
 243
 244static int arp_interval = BOND_LINK_ARP_INTERV;
 245static char *arp_ip_target = NULL;
 246static unsigned long arp_target = 0;
 247static u32 my_ip = 0;
 248char *arp_target_hw_addr = NULL;
 249
 250static int max_bonds    = BOND_DEFAULT_MAX_BONDS;
 251static int miimon       = BOND_LINK_MON_INTERV;
 252static int mode         = BOND_MODE_ROUNDROBIN;
 253static int updelay      = 0;
 254static int downdelay    = 0;
 255
 256static int first_pass   = 1;
 257int bond_cnt;
 258static struct bonding *these_bonds =  NULL;
 259static struct net_device *dev_bonds = NULL;
 260
 261MODULE_PARM(max_bonds, "i");
 262MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
 263MODULE_PARM(miimon, "i");
 264MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
 265MODULE_PARM(mode, "i");
 266MODULE_PARM(arp_interval, "i");
 267MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 268MODULE_PARM(arp_ip_target, "s");
 269MODULE_PARM_DESC(arp_ip_target, "arp target in n.n.n.n form");
 270MODULE_PARM_DESC(mode, "Mode of operation : 0 for round robin, 1 for active-backup, 2 for xor");
 271MODULE_PARM(updelay, "i");
 272MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
 273MODULE_PARM(downdelay, "i");
 274MODULE_PARM_DESC(downdelay, "Delay before considering link down, in milliseconds");
 275
 276extern void arp_send( int type, int ptype, u32 dest_ip, struct net_device *dev,
 277        u32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, 
 278        unsigned char *target_hw);
 279
 280static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev);
 281static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev);
 282static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev);
 283static struct net_device_stats *bond_get_stats(struct net_device *dev);
 284static void bond_mii_monitor(struct net_device *dev);
 285static void bond_arp_monitor(struct net_device *dev);
 286static int bond_event(struct notifier_block *this, unsigned long event, void *ptr);
 287static void bond_restore_slave_flags(slave_t *slave);
 288static void bond_mc_list_destroy(struct bonding *bond);
 289static void bond_mc_add(bonding_t *bond, void *addr, int alen);
 290static void bond_mc_delete(bonding_t *bond, void *addr, int alen);
 291static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst, int gpf_flag);
 292static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2);
 293static void bond_set_promiscuity(bonding_t *bond, int inc);
 294static void bond_set_allmulti(bonding_t *bond, int inc);
 295static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi, struct dev_mc_list *mc_list);
 296static void bond_set_slave_inactive_flags(slave_t *slave);
 297static void bond_set_slave_active_flags(slave_t *slave);
 298static int bond_enslave(struct net_device *master, struct net_device *slave);
 299static int bond_release(struct net_device *master, struct net_device *slave);
 300static int bond_release_all(struct net_device *master);
 301static int bond_sethwaddr(struct net_device *master, struct net_device *slave);
 302
 303/*
 304 * bond_get_info is the interface into the /proc filesystem.  This is
 305 * a different interface than the BOND_INFO_QUERY ioctl.  That is done
 306 * through the generic networking ioctl interface, and bond_info_query
 307 * is the internal function which provides that information.
 308 */
 309static int bond_get_info(char *buf, char **start, off_t offset, int length);
 310
 311/* #define BONDING_DEBUG 1 */
 312
 313/* several macros */
 314
 315#define IS_UP(dev)      ((((dev)->flags & (IFF_UP)) == (IFF_UP)) && \
 316                        (netif_running(dev) && netif_carrier_ok(dev)))
 317
 318static void bond_restore_slave_flags(slave_t *slave)
 319{
 320        slave->dev->flags = slave->original_flags;
 321}
 322
 323static void bond_set_slave_inactive_flags(slave_t *slave)
 324{
 325        slave->state = BOND_STATE_BACKUP;
 326        slave->dev->flags |= IFF_NOARP;
 327}
 328
 329static void bond_set_slave_active_flags(slave_t *slave)
 330{
 331        slave->state = BOND_STATE_ACTIVE;
 332        slave->dev->flags &= ~IFF_NOARP;
 333}
 334
 335/* 
 336 * This function detaches the slave <slave> from the list <bond>.
 337 * WARNING: no check is made to verify if the slave effectively
 338 * belongs to <bond>. It returns <slave> in case it's needed.
 339 * Nothing is freed on return, structures are just unchained.
 340 * If the bond->current_slave pointer was pointing to <slave>,
 341 * it's replaced with slave->next, or <bond> if not applicable.
 342 */
 343static slave_t *bond_detach_slave(bonding_t *bond, slave_t *slave)
 344{
 345        if ((bond == NULL) || (slave == NULL) ||
 346           ((void *)bond == (void *)slave)) {
 347                printk(KERN_ERR
 348                        "bond_detach_slave(): trying to detach "
 349                        "slave %p from bond %p\n", bond, slave);
 350                return slave;
 351        }
 352
 353        if (bond->next == slave) {  /* is the slave at the head ? */
 354                if (bond->prev == slave) {  /* is the slave alone ? */
 355                        write_lock(&bond->ptrlock);
 356                        bond->current_slave = NULL; /* no slave anymore */
 357                        write_unlock(&bond->ptrlock);
 358                        bond->prev = bond->next = (slave_t *)bond;
 359                } else { /* not alone */
 360                        bond->next        = slave->next;
 361                        slave->next->prev = (slave_t *)bond;
 362                        bond->prev->next  = slave->next;
 363
 364                        write_lock(&bond->ptrlock);
 365                        if (bond->current_slave == slave) {
 366                                bond->current_slave = slave->next;
 367                        }
 368                        write_unlock(&bond->ptrlock);
 369                }
 370        }
 371        else {
 372                slave->prev->next = slave->next;
 373                if (bond->prev == slave) {  /* is this slave the last one ? */
 374                        bond->prev = slave->prev;
 375                } else {
 376                        slave->next->prev = slave->prev;
 377                }
 378
 379                write_lock(&bond->ptrlock);
 380                if (bond->current_slave == slave) {
 381                        bond->current_slave = slave->next;
 382                }
 383                write_unlock(&bond->ptrlock);
 384        }
 385
 386        return slave;
 387}
 388
 389/* 
 390 * if <dev> supports MII link status reporting, check its link
 391 * and report it as a bit field in a short int :
 392 *   - 0x04 means link is up,
 393 *   - 0x20 means end of autonegociation
 394 * If the device doesn't support MII, then we only report 0x24,
 395 * meaning that the link is up and running since we can't check it.
 396 */
 397static u16 bond_check_dev_link(struct net_device *dev)
 398{
 399        static int (* ioctl)(struct net_device *, struct ifreq *, int);
 400        struct ifreq ifr;
 401        struct mii_ioctl_data *mii;
 402        struct ethtool_value etool;
 403
 404        if ((ioctl = dev->do_ioctl) != NULL)  { /* ioctl to access MII */
 405                /* TODO: set pointer to correct ioctl on a per team member */
 406                /*       bases to make this more efficient. that is, once  */
 407                /*       we determine the correct ioctl, we will always    */
 408                /*       call it and not the others for that team          */
 409                /*       member.                                           */
 410
 411                /* try SOICETHTOOL ioctl, some drivers cache ETHTOOL_GLINK */
 412                /* for a period of time; we need to encourage link status  */
 413                /* be reported by network drivers in real time; if the     */
 414                /* value is cached, the mmimon module parm may have no     */
 415                /* effect...                                               */
 416                etool.cmd = ETHTOOL_GLINK;
 417                ifr.ifr_data = (char*)&etool;
 418                if (ioctl(dev, &ifr, SIOCETHTOOL) == 0) {
 419                        if (etool.data == 1) {
 420                                return(MII_LINK_READY);
 421                        } 
 422                        else { 
 423                                return(0);
 424                        } 
 425                }
 426
 427                /*
 428                 * We cannot assume that SIOCGMIIPHY will also read a
 429                 * register; not all network drivers support that.
 430                 */
 431
 432                /* Yes, the mii is overlaid on the ifreq.ifr_ifru */
 433                mii = (struct mii_ioctl_data *)&ifr.ifr_data;
 434                if (ioctl(dev, &ifr, SIOCGMIIPHY) != 0) {
 435                        return MII_LINK_READY;   /* can't tell */
 436                }
 437
 438                mii->reg_num = 1;
 439                if (ioctl(dev, &ifr, SIOCGMIIREG) == 0) {
 440                        /*
 441                         * mii->val_out contains MII reg 1, BMSR
 442                         * 0x0004 means link established
 443                         */
 444                        return mii->val_out;
 445                }
 446
 447        }
 448        return MII_LINK_READY;  /* spoof link up ( we can't check it) */
 449}
 450
 451static u16 bond_check_mii_link(bonding_t *bond)
 452{
 453        int has_active_interface = 0;
 454        unsigned long flags;
 455
 456        read_lock_irqsave(&bond->lock, flags);
 457        read_lock(&bond->ptrlock);
 458        has_active_interface = (bond->current_slave != NULL);
 459        read_unlock(&bond->ptrlock);
 460        read_unlock_irqrestore(&bond->lock, flags);
 461
 462        return (has_active_interface ? MII_LINK_READY : 0);
 463}
 464
 465static int bond_open(struct net_device *dev)
 466{
 467        struct timer_list *timer = &((struct bonding *)(dev->priv))->mii_timer;
 468        struct timer_list *arp_timer = &((struct bonding *)(dev->priv))->arp_timer;
 469        MOD_INC_USE_COUNT;
 470
 471        if (miimon > 0) {  /* link check interval, in milliseconds. */
 472                init_timer(timer);
 473                timer->expires  = jiffies + (miimon * HZ / 1000);
 474                timer->data     = (unsigned long)dev;
 475                timer->function = (void *)&bond_mii_monitor;
 476                add_timer(timer);
 477        }
 478
 479        if (arp_interval> 0) {  /* arp interval, in milliseconds. */
 480                init_timer(arp_timer);
 481                arp_timer->expires  = jiffies + (arp_interval * HZ / 1000);
 482                arp_timer->data     = (unsigned long)dev;
 483                arp_timer->function = (void *)&bond_arp_monitor;
 484                add_timer(arp_timer);
 485        }
 486        return 0;
 487}
 488
 489static int bond_close(struct net_device *master)
 490{
 491        bonding_t *bond = (struct bonding *) master->priv;
 492        unsigned long flags;
 493
 494        write_lock_irqsave(&bond->lock, flags);
 495
 496        if (miimon > 0) {  /* link check interval, in milliseconds. */
 497                del_timer(&bond->mii_timer);
 498        }
 499        if (arp_interval> 0) {  /* arp interval, in milliseconds. */
 500                del_timer(&bond->arp_timer);
 501        }
 502
 503        /* Release the bonded slaves */
 504        bond_release_all(master);
 505        bond_mc_list_destroy (bond);
 506
 507        write_unlock_irqrestore(&bond->lock, flags);
 508
 509        MOD_DEC_USE_COUNT;
 510        return 0;
 511}
 512
 513/* 
 514 * flush all members of flush->mc_list from device dev->mc_list
 515 */
 516static void bond_mc_list_flush(struct net_device *dev, struct net_device *flush)
 517{ 
 518        struct dev_mc_list *dmi; 
 519 
 520        for (dmi = flush->mc_list; dmi != NULL; dmi = dmi->next) 
 521                dev_mc_delete(dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
 522}
 523
 524/*
 525 * Totally destroys the mc_list in bond
 526 */
 527static void bond_mc_list_destroy(struct bonding *bond)
 528{
 529        struct dev_mc_list *dmi;
 530
 531        dmi = bond->mc_list; 
 532        while (dmi) { 
 533                bond->mc_list = dmi->next; 
 534                kfree(dmi); 
 535                dmi = bond->mc_list; 
 536        }
 537}
 538
 539/*
 540 * Add a Multicast address to every slave in the bonding group
 541 */
 542static void bond_mc_add(bonding_t *bond, void *addr, int alen)
 543{ 
 544        slave_t *slave;
 545
 546        for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) {
 547                dev_mc_add(slave->dev, addr, alen, 0);
 548        }
 549} 
 550
 551/*
 552 * Remove a multicast address from every slave in the bonding group
 553 */
 554static void bond_mc_delete(bonding_t *bond, void *addr, int alen)
 555{ 
 556        slave_t *slave; 
 557
 558        for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev)
 559                dev_mc_delete(slave->dev, addr, alen, 0);
 560} 
 561
 562/*
 563 * Copy all the Multicast addresses from src to the bonding device dst
 564 */
 565static int bond_mc_list_copy (struct dev_mc_list *src, struct bonding *dst,
 566 int gpf_flag)
 567{
 568        struct dev_mc_list *dmi, *new_dmi;
 569
 570        for (dmi = src; dmi != NULL; dmi = dmi->next) { 
 571                new_dmi = kmalloc(sizeof(struct dev_mc_list), gpf_flag);
 572
 573                if (new_dmi == NULL) {
 574                        return -ENOMEM; 
 575                }
 576
 577                new_dmi->next = dst->mc_list; 
 578                dst->mc_list = new_dmi;
 579
 580                new_dmi->dmi_addrlen = dmi->dmi_addrlen; 
 581                memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen); 
 582                new_dmi->dmi_users = dmi->dmi_users;
 583                new_dmi->dmi_gusers = dmi->dmi_gusers; 
 584        } 
 585        return 0;
 586}
 587
 588/*
 589 * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
 590 */
 591static inline int dmi_same(struct dev_mc_list *dmi1, struct dev_mc_list *dmi2)
 592{ 
 593        return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
 594         dmi1->dmi_addrlen == dmi2->dmi_addrlen;
 595} 
 596
 597/*
 598 * Push the promiscuity flag down to all slaves
 599 */
 600static void bond_set_promiscuity(bonding_t *bond, int inc)
 601{ 
 602        slave_t *slave; 
 603 
 604        for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev)
 605                dev_set_promiscuity(slave->dev, inc);
 606} 
 607
 608/*
 609 * Push the allmulti flag down to all slaves
 610 */
 611static void bond_set_allmulti(bonding_t *bond, int inc)
 612{ 
 613        slave_t *slave; 
 614 
 615        for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev)
 616                dev_set_allmulti(slave->dev, inc);
 617} 
 618
 619/* 
 620 * returns dmi entry if found, NULL otherwise 
 621 */
 622static struct dev_mc_list* bond_mc_list_find_dmi(struct dev_mc_list *dmi,
 623 struct dev_mc_list *mc_list)
 624{ 
 625        struct dev_mc_list *idmi;
 626
 627        for (idmi = mc_list; idmi != NULL; idmi = idmi->next) {
 628                if (dmi_same(dmi, idmi)) {
 629                        return idmi; 
 630                }
 631        }
 632        return NULL;
 633} 
 634
 635static void set_multicast_list(struct net_device *master)
 636{
 637        bonding_t *bond = master->priv;
 638        struct dev_mc_list *dmi;
 639        unsigned long flags = 0;
 640
 641        /*
 642         * Lock the private data for the master
 643         */
 644        write_lock_irqsave(&bond->lock, flags);
 645
 646        /* set promiscuity flag to slaves */
 647        if ( (master->flags & IFF_PROMISC) && !(bond->flags & IFF_PROMISC) )
 648                bond_set_promiscuity(bond, 1); 
 649
 650        if ( !(master->flags & IFF_PROMISC) && (bond->flags & IFF_PROMISC) ) 
 651                bond_set_promiscuity(bond, -1); 
 652
 653        /* set allmulti flag to slaves */ 
 654        if ( (master->flags & IFF_ALLMULTI) && !(bond->flags & IFF_ALLMULTI) ) 
 655                bond_set_allmulti(bond, 1); 
 656
 657        if ( !(master->flags & IFF_ALLMULTI) && (bond->flags & IFF_ALLMULTI) )
 658                bond_set_allmulti(bond, -1); 
 659
 660        bond->flags = master->flags; 
 661
 662        /* looking for addresses to add to slaves' mc list */ 
 663        for (dmi = master->mc_list; dmi != NULL; dmi = dmi->next) { 
 664                if (bond_mc_list_find_dmi(dmi, bond->mc_list) == NULL) 
 665                 bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen); 
 666        } 
 667
 668        /* looking for addresses to delete from slaves' list */ 
 669        for (dmi = bond->mc_list; dmi != NULL; dmi = dmi->next) { 
 670                if (bond_mc_list_find_dmi(dmi, master->mc_list) == NULL) 
 671                 bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen); 
 672        }
 673
 674
 675        /* save master's multicast list */ 
 676        bond_mc_list_destroy (bond);
 677        bond_mc_list_copy (master->mc_list, bond, GFP_KERNEL);
 678
 679        write_unlock_irqrestore(&bond->lock, flags);
 680}
 681
 682/*
 683 * This function counts the number of attached 
 684 * slaves for use by bond_xmit_xor.
 685 */
 686static void update_slave_cnt(bonding_t *bond)
 687{
 688        slave_t *slave = NULL;
 689
 690        bond->slave_cnt = 0;
 691        for (slave = bond->prev; slave != (slave_t*)bond; slave = slave->prev) {
 692                bond->slave_cnt++;
 693        }
 694}
 695
 696/* enslave device <slave> to bond device <master> */
 697static int bond_enslave(struct net_device *master_dev, 
 698                        struct net_device *slave_dev)
 699{
 700        bonding_t *bond = NULL;
 701        slave_t *new_slave = NULL;
 702        unsigned long flags = 0;
 703        int ndx = 0;
 704        int err = 0;
 705        struct dev_mc_list *dmi;
 706
 707        if (master_dev == NULL || slave_dev == NULL) {
 708                return -ENODEV;
 709        }
 710        bond = (struct bonding *) master_dev->priv;
 711
 712        if (slave_dev->do_ioctl == NULL) {
 713                printk(KERN_DEBUG
 714                        "Warning : no link monitoring support for %s\n",
 715                        slave_dev->name);
 716        }
 717        write_lock_irqsave(&bond->lock, flags);
 718
 719        /* not running. */
 720        if ((slave_dev->flags & IFF_UP) != IFF_UP) {
 721#ifdef BONDING_DEBUG
 722                printk(KERN_CRIT "Error, slave_dev is not running\n");
 723#endif
 724                write_unlock_irqrestore(&bond->lock, flags);
 725                return -EINVAL;
 726        }
 727
 728        /* already enslaved */
 729        if (master_dev->flags & IFF_SLAVE || slave_dev->flags & IFF_SLAVE) {
 730#ifdef BONDING_DEBUG
 731                printk(KERN_CRIT "Error, Device was already enslaved\n");
 732#endif
 733                write_unlock_irqrestore(&bond->lock, flags);
 734                return -EBUSY;
 735        }
 736                   
 737        if ((new_slave = kmalloc(sizeof(slave_t), GFP_KERNEL)) == NULL) {
 738                write_unlock_irqrestore(&bond->lock, flags);
 739                return -ENOMEM;
 740        }
 741        memset(new_slave, 0, sizeof(slave_t));
 742
 743        /* save flags before call to netdev_set_master */
 744        new_slave->original_flags = slave_dev->flags;
 745        err = netdev_set_master(slave_dev, master_dev);
 746
 747        if (err) {
 748#ifdef BONDING_DEBUG
 749                printk(KERN_CRIT "Error %d calling netdev_set_master\n", err);
 750#endif
 751                kfree(new_slave);
 752                write_unlock_irqrestore(&bond->lock, flags);
 753                return err;      
 754        }
 755
 756        new_slave->dev = slave_dev;
 757
 758        /* set promiscuity level to new slave */ 
 759        if (master_dev->flags & IFF_PROMISC)
 760                dev_set_promiscuity(slave_dev, 1); 
 761 
 762        /* set allmulti level to new slave */
 763        if (master_dev->flags & IFF_ALLMULTI) 
 764                dev_set_allmulti(slave_dev, 1); 
 765 
 766        /* upload master's mc_list to new slave */ 
 767        for (dmi = master_dev->mc_list; dmi != NULL; dmi = dmi->next) 
 768                dev_mc_add (slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
 769
 770        /* 
 771         * queue to the end of the slaves list, make the first element its
 772         * successor, the last one its predecessor, and make it the bond's
 773         * predecessor. 
 774         *
 775         * Just to clarify, so future bonding driver hackers don't go through
 776         * the same confusion stage I did trying to figure this out, the
 777         * slaves are stored in a double linked circular list, sortof.
 778         * In the ->next direction, the last slave points to the first slave,
 779         * bypassing bond; only the slaves are in the ->next direction.
 780         * In the ->prev direction, however, the first slave points to bond
 781         * and bond points to the last slave.
 782         *
 783         * It looks like a circle with a little bubble hanging off one side
 784         * in the ->prev direction only.
 785         *
 786         * When going through the list once, its best to start at bond->prev
 787         * and go in the ->prev direction, testing for bond.  Doing this
 788         * in the ->next direction doesn't work.  Trust me, I know this now.
 789         * :)  -mts 2002.03.14
 790         */
 791        new_slave->prev       = bond->prev;
 792        new_slave->prev->next = new_slave;
 793        bond->prev            = new_slave;
 794        new_slave->next       = bond->next;
 795
 796        new_slave->delay = 0;
 797        new_slave->link_failure_count = 0;
 798
 799        /* check for initial state */
 800        if ((miimon <= 0) || ((bond_check_dev_link(slave_dev) & MII_LINK_READY)
 801                 == MII_LINK_READY)) {
 802#ifdef BONDING_DEBUG
 803                printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_UP\n");
 804#endif
 805                new_slave->link  = BOND_LINK_UP;
 806        }
 807        else {
 808#ifdef BONDING_DEBUG
 809                printk(KERN_CRIT "Initial state of slave_dev is BOND_LINK_DOWN\n");
 810#endif
 811                new_slave->link  = BOND_LINK_DOWN;
 812        }
 813
 814        /* if we're in active-backup mode, we need one and only one active
 815         * interface. The backup interfaces will have their NOARP flag set
 816         * because we need them to be completely deaf and not to respond to
 817         * any ARP request on the network to avoid fooling a switch. Thus,
 818         * since we guarantee that current_slave always point to the last
 819         * usable interface, we just have to verify this interface's flag.
 820         */
 821        if (mode == BOND_MODE_ACTIVEBACKUP) {
 822                if (((bond->current_slave == NULL)
 823                        || (bond->current_slave->dev->flags & IFF_NOARP))
 824                        && (new_slave->link == BOND_LINK_UP)) {
 825#ifdef BONDING_DEBUG
 826                        printk(KERN_CRIT "This is the first active slave\n");
 827#endif
 828                        /* first slave or no active slave yet, and this link
 829                           is OK, so make this interface the active one */
 830                        bond->current_slave = new_slave;
 831                        bond_set_slave_active_flags(new_slave);
 832                }
 833                else {
 834#ifdef BONDING_DEBUG
 835                        printk(KERN_CRIT "This is just a backup slave\n");
 836#endif
 837                        bond_set_slave_inactive_flags(new_slave);
 838                }
 839        } else {
 840#ifdef BONDING_DEBUG
 841                printk(KERN_CRIT "This slave is always active in trunk mode\n");
 842#endif
 843                /* always active in trunk mode */
 844                new_slave->state = BOND_STATE_ACTIVE;
 845                if (bond->current_slave == NULL) {
 846                        bond->current_slave = new_slave;
 847                }
 848        }
 849
 850        update_slave_cnt(bond);
 851
 852        write_unlock_irqrestore(&bond->lock, flags);
 853
 854        /*
 855         * !!! This is to support old versions of ifenslave.  We can remove
 856         * this in 2.5 because our ifenslave takes care of this for us.
 857         * We check to see if the master has a mac address yet.  If not,
 858         * we'll give it the mac address of our slave device.
 859         */
 860        for (ndx = 0; ndx < slave_dev->addr_len; ndx++) {
 861#ifdef BONDING_DEBUG
 862                printk(KERN_CRIT "Checking ndx=%d of master_dev->dev_addr\n",
 863                       ndx);
 864#endif
 865                if (master_dev->dev_addr[ndx] != 0) {
 866#ifdef BONDING_DEBUG
 867                printk(KERN_CRIT "Found non-zero byte at ndx=%d\n",
 868                       ndx);
 869#endif
 870                        break;
 871                }
 872        }
 873        if (ndx == slave_dev->addr_len) {
 874                /*
 875                 * We got all the way through the address and it was
 876                 * all 0's.
 877                 */
 878#ifdef BONDING_DEBUG
 879                printk(KERN_CRIT "%s doesn't have a MAC address yet.  ",
 880                       master_dev->name);
 881                printk(KERN_CRIT "Going to give assign it from %s.\n",
 882                       slave_dev->name);
 883#endif
 884                bond_sethwaddr(master_dev, slave_dev);
 885        }
 886
 887        printk (KERN_INFO "%s: enslaving %s as a%s interface with a%s link.\n",
 888                master_dev->name, slave_dev->name,
 889                new_slave->state == BOND_STATE_ACTIVE ? "n active" : " backup",
 890                new_slave->link == BOND_LINK_UP ? "n up" : " down");
 891
 892        return 0;
 893}
 894
 895/* 
 896 * This function changes the active slave to slave <slave_dev>.
 897 * It returns -EINVAL in the following cases.
 898 *  - <slave_dev> is not found in the list.
 899 *  - There is not active slave now.
 900 *  - <slave_dev> is already active.
 901 *  - The link state of <slave_dev> is not BOND_LINK_UP.
 902 *  - <slave_dev> is not running.
 903 * In these cases, this fuction does nothing.
 904 * In the other cases, currnt_slave pointer is changed and 0 is returned.
 905 */
 906static int bond_change_active(struct net_device *master_dev, struct net_device *slave_dev)
 907{
 908        bonding_t *bond;
 909        slave_t *slave;
 910        slave_t *oldactive = NULL;
 911        slave_t *newactive = NULL;
 912        unsigned long flags;
 913        int ret = 0;
 914
 915        if (master_dev == NULL || slave_dev == NULL) {
 916                return -ENODEV;
 917        }
 918
 919        bond = (struct bonding *) master_dev->priv;
 920        write_lock_irqsave(&bond->lock, flags);
 921        slave = (slave_t *)bond;
 922        oldactive = bond->current_slave;
 923
 924        while ((slave = slave->prev) != (slave_t *)bond) {
 925                if(slave_dev == slave->dev) {
 926                        newactive = slave;
 927                        break;
 928                }
 929        }
 930
 931        if ((newactive != NULL)&&
 932            (oldactive != NULL)&&
 933            (newactive != oldactive)&&
 934            (newactive->link == BOND_LINK_UP)&&
 935            IS_UP(newactive->dev)) {
 936                bond_set_slave_inactive_flags(oldactive);
 937                bond_set_slave_active_flags(newactive);
 938                bond->current_slave = newactive;
 939                printk("%s : activate %s(old : %s)\n",
 940                        master_dev->name, newactive->dev->name, 
 941                        oldactive->dev->name);
 942        }
 943        else {
 944                ret = -EINVAL;
 945        }
 946        write_unlock_irqrestore(&bond->lock, flags);
 947        return ret;
 948}
 949
 950/* Choose a new valid interface from the pool, set it active
 951 * and make it the current slave. If no valid interface is
 952 * found, the oldest slave in BACK state is choosen and
 953 * activated. If none is found, it's considered as no
 954 * interfaces left so the current slave is set to NULL.
 955 * The result is a pointer to the current slave.
 956 *
 957 * Since this function sends messages tails through printk, the caller
 958 * must have started something like `printk(KERN_INFO "xxxx ");'.
 959 *
 960 * Warning: must put locks around the call to this function if needed.
 961 */
 962slave_t *change_active_interface(bonding_t *bond)
 963{
 964        slave_t *newslave, *oldslave;
 965        slave_t *bestslave = NULL;
 966        int mintime;
 967
 968        read_lock(&bond->ptrlock);
 969        newslave = oldslave = bond->current_slave;
 970        read_unlock(&bond->ptrlock);
 971
 972        if (newslave == NULL) { /* there were no active slaves left */
 973                if (bond->next != (slave_t *)bond) {  /* found one slave */
 974                        write_lock(&bond->ptrlock);
 975                        newslave = bond->current_slave = bond->next;
 976                        write_unlock(&bond->ptrlock);
 977                } else {
 978                        printk (" but could not find any %s interface.\n",
 979                                (mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
 980                        write_lock(&bond->ptrlock);
 981                        bond->current_slave = (slave_t *)NULL;
 982                        write_unlock(&bond->ptrlock);
 983                        return NULL; /* still no slave, return NULL */
 984                }
 985        }
 986
 987        mintime = updelay;
 988
 989        do {
 990                if (IS_UP(newslave->dev)) {
 991                        if (newslave->link == BOND_LINK_UP) {
 992                                /* this one is immediately usable */
 993                                if (mode == BOND_MODE_ACTIVEBACKUP) {
 994                                        bond_set_slave_active_flags(newslave);
 995                                        printk (" and making interface %s the active one.\n",
 996                                                newslave->dev->name);
 997                                }
 998                                else {
 999                                        printk (" and setting pointer to interface %s.\n",
1000                                                newslave->dev->name);
1001                                }
1002
1003                                write_lock(&bond->ptrlock);
1004                                bond->current_slave = newslave;
1005                                write_unlock(&bond->ptrlock);
1006                                return newslave;
1007                        }
1008                        else if (newslave->link == BOND_LINK_BACK) {
1009                                /* link up, but waiting for stabilization */
1010                                if (newslave->delay < mintime) {
1011                                        mintime = newslave->delay;
1012                                        bestslave = newslave;
1013                                }
1014                        }
1015                }
1016        } while ((newslave = newslave->next) != oldslave);
1017
1018        /* no usable backup found, we'll see if we at least got a link that was
1019           coming back for a long time, and could possibly already be usable.
1020        */
1021
1022        if (bestslave != NULL) {
1023                /* early take-over. */
1024                printk (" and making interface %s the active one %d ms earlier.\n",
1025                        bestslave->dev->name,
1026                        (updelay - bestslave->delay)*miimon);
1027
1028                bestslave->delay = 0;
1029                bestslave->link = BOND_LINK_UP;
1030                bond_set_slave_active_flags(bestslave);
1031
1032                write_lock(&bond->ptrlock);
1033                bond->current_slave = bestslave;
1034                write_unlock(&bond->ptrlock);
1035                return bestslave;
1036        }
1037
1038        printk (" but could not find any %s interface.\n",
1039                (mode == BOND_MODE_ACTIVEBACKUP) ? "backup":"other");
1040        
1041        /* absolutely nothing found. let's return NULL */
1042        write_lock(&bond->ptrlock);
1043        bond->current_slave = (slave_t *)NULL;
1044        write_unlock(&bond->ptrlock);
1045        return NULL;
1046}
1047
1048/*
1049 * Try to release the slave device <slave> from the bond device <master>
1050 * It is legal to access current_slave without a lock because all the function
1051 * is write-locked.
1052 *
1053 * The rules for slave state should be:
1054 *   for Active/Backup:
1055 *     Active stays on all backups go down
1056 *   for Bonded connections:
1057 *     The first up interface should be left on and all others downed.
1058 */
1059static int bond_release(struct net_device *master, struct net_device *slave)
1060{
1061        bonding_t *bond;
1062        slave_t *our_slave, *old_current;
1063        unsigned long flags;
1064        
1065        if (master == NULL || slave == NULL)  {
1066                return -ENODEV;
1067        }
1068
1069        bond = (struct bonding *) master->priv;
1070
1071        write_lock_irqsave(&bond->lock, flags);
1072
1073        /* master already enslaved, or slave not enslaved,
1074           or no slave for this master */
1075        if ((master->flags & IFF_SLAVE) || !(slave->flags & IFF_SLAVE)) {
1076                printk (KERN_DEBUG "%s: cannot release %s.\n", master->name, slave->name);
1077                write_unlock_irqrestore(&bond->lock, flags);
1078                return -EINVAL;
1079        }
1080
1081        our_slave = (slave_t *)bond;
1082        old_current = bond->current_slave;
1083        while ((our_slave = our_slave->prev) != (slave_t *)bond) {
1084                if (our_slave->dev == slave) {
1085                        bond_detach_slave(bond, our_slave);
1086
1087                        printk (KERN_INFO "%s: releasing %s interface %s",
1088                                master->name,
1089                                (our_slave->state == BOND_STATE_ACTIVE) ? "active" : "backup",
1090                                slave->name);
1091
1092                        if (our_slave == old_current) {
1093                                /* find a new interface and be verbose */
1094                                change_active_interface(bond); 
1095                        } else {
1096                                printk(".\n");
1097                        }
1098
1099                        /* release the slave from its bond */
1100
1101                        /* flush master's mc_list from slave */ 
1102                        bond_mc_list_flush (slave, master); 
1103       
1104                        /* unset promiscuity level from slave */
1105                        if (master->flags & IFF_PROMISC) 
1106                                dev_set_promiscuity(slave, -1); 
1107       
1108                        /* unset allmulti level from slave */ 
1109                        if (master->flags & IFF_ALLMULTI)
1110                                dev_set_allmulti(slave, -1); 
1111
1112                        netdev_set_master(slave, NULL);
1113
1114                        /* only restore its RUNNING flag if monitoring set it down */
1115                        if (slave->flags & IFF_UP) {
1116                                slave->flags |= IFF_RUNNING;
1117                        }
1118
1119                        if (slave->flags & IFF_NOARP || 
1120                                bond->current_slave != NULL) {
1121                                        dev_close(slave);
1122                        }
1123
1124                        bond_restore_slave_flags(our_slave);
1125                        kfree(our_slave);
1126
1127                        if (bond->current_slave == NULL) {
1128                                printk(KERN_INFO
1129                                        "%s: now running without any active interface !\n",
1130                                        master->name);
1131                        }
1132
1133                        update_slave_cnt(bond);
1134
1135                        write_unlock_irqrestore(&bond->lock, flags);
1136                        return 0;  /* deletion OK */
1137                }
1138        }
1139
1140        /* if we get here, it's because the device was not found */
1141        write_unlock_irqrestore(&bond->lock, flags);
1142
1143        printk (KERN_INFO "%s: %s not enslaved\n", master->name, slave->name);
1144        return -EINVAL;
1145}
1146
1147/* 
1148 * This function releases all slaves.
1149 * Warning: must put write-locks around the call to this function.
1150 */
1151static int bond_release_all(struct net_device *master)
1152{
1153        bonding_t *bond;
1154        slave_t *our_slave;
1155        struct net_device *slave_dev;
1156
1157        if (master == NULL)  {
1158                return -ENODEV;
1159        }
1160
1161        if (master->flags & IFF_SLAVE) {
1162                return -EINVAL;
1163        }
1164
1165        bond = (struct bonding *) master->priv;
1166        bond->current_slave = NULL;
1167
1168        while ((our_slave = bond->prev) != (slave_t *)bond) {
1169                slave_dev = our_slave->dev;
1170                bond->prev = our_slave->prev;
1171
1172                kfree(our_slave);
1173
1174                netdev_set_master(slave_dev, NULL);
1175
1176                /* only restore its RUNNING flag if monitoring set it down */
1177                if (slave_dev->flags & IFF_UP)
1178                        slave_dev->flags |= IFF_RUNNING;
1179
1180                if (slave_dev->flags & IFF_NOARP)
1181                        dev_close(slave_dev);
1182        }
1183        bond->next = (slave_t *)bond;
1184        bond->slave_cnt = 0;
1185        printk (KERN_INFO "%s: releases all slaves\n", master->name);
1186
1187        return 0;
1188}
1189
1190/* this function is called regularly to monitor each slave's link. */
1191static void bond_mii_monitor(struct net_device *master)
1192{
1193        bonding_t *bond = (struct bonding *) master->priv;
1194        slave_t *slave, *bestslave, *oldcurrent;
1195        unsigned long flags;
1196        int slave_died = 0;
1197
1198        read_lock_irqsave(&bond->lock, flags);
1199
1200        /* we will try to read the link status of each of our slaves, and
1201         * set their IFF_RUNNING flag appropriately. For each slave not
1202         * supporting MII status, we won't do anything so that a user-space
1203         * program could monitor the link itself if needed.
1204         */
1205
1206        bestslave = NULL;
1207        slave = (slave_t *)bond;
1208
1209        read_lock(&bond->ptrlock);
1210        oldcurrent = bond->current_slave;
1211        read_unlock(&bond->ptrlock);
1212
1213        while ((slave = slave->prev) != (slave_t *)bond) {
1214                /* use updelay+1 to match an UP slave even when updelay is 0 */
1215                int mindelay = updelay + 1;
1216                struct net_device *dev = slave->dev;
1217                u16 link_state;
1218                
1219                link_state = bond_check_dev_link(dev);
1220
1221                switch (slave->link) {
1222                case BOND_LINK_UP:      /* the link was up */
1223                        if ((link_state & MII_LINK_UP) == MII_LINK_UP) {
1224                                /* link stays up, tell that this one
1225                                   is immediately available */
1226                                if (IS_UP(dev) && (mindelay > -2)) {
1227                                        /* -2 is the best case :
1228                                           this slave was already up */
1229                                        mindelay = -2;
1230                                        bestslave = slave;
1231                                }
1232                                break;
1233                        }
1234                        else { /* link going down */
1235                                slave->link  = BOND_LINK_FAIL;
1236                                slave->delay = downdelay;
1237                                if (slave->link_failure_count < UINT_MAX) {
1238                                        slave->link_failure_count++;
1239                                }
1240                                if (downdelay > 0) {
1241                                        printk (KERN_INFO
1242                                                "%s: link status down for %sinterface "
1243                                                "%s, disabling it in %d ms.\n",
1244                                                master->name,
1245                                                IS_UP(dev)
1246                                                ? ((mode == BOND_MODE_ACTIVEBACKUP)
1247                                                   ? ((slave == oldcurrent)
1248                                                      ? "active " : "backup ")
1249                                                   : "")
1250                                                : "idle ",
1251                                                dev->name,
1252                                                downdelay * miimon);
1253                                        }
1254                        }
1255                        /* no break ! fall through the BOND_LINK_FAIL test to
1256                           ensure proper action to be taken
1257                        */
1258                case BOND_LINK_FAIL:    /* the link has just gone down */
1259                        if ((link_state & MII_LINK_UP) == 0) {
1260                                /* link stays down */
1261                                if (slave->delay <= 0) {
1262                                        /* link down for too long time */
1263                                        slave->link = BOND_LINK_DOWN;
1264                                        /* in active/backup mode, we must
1265                                           completely disable this interface */
1266                                        if (mode == BOND_MODE_ACTIVEBACKUP) {
1267                                                bond_set_slave_inactive_flags(slave);
1268                                        }
1269                                        printk(KERN_INFO
1270                                                "%s: link status definitely down "
1271                                                "for interface %s, disabling it",
1272                                                master->name,
1273                                                dev->name);
1274
1275                                        read_lock(&bond->ptrlock);
1276                                        if (slave == bond->current_slave) {
1277                                                read_unlock(&bond->ptrlock);
1278                                                /* find a new interface and be verbose */
1279                                                change_active_interface(bond);
1280                                        } else {
1281                                                read_unlock(&bond->ptrlock);
1282                                                printk(".\n");
1283                                        }
1284                                        slave_died = 1;
1285                                } else {
1286                                        slave->delay--;
1287                                }
1288                        } else if ((link_state & MII_LINK_READY) == MII_LINK_READY) {
1289                                /* link up again */
1290                                slave->link  = BOND_LINK_UP;
1291                                printk(KERN_INFO
1292                                        "%s: link status up again after %d ms "
1293                                        "for interface %s.\n",
1294                                        master->name,
1295                                        (downdelay - slave->delay) * miimon,
1296                                        dev->name);
1297
1298                                if (IS_UP(dev) && (mindelay > -1)) {
1299                                        /* -1 is a good case : this slave went
1300                                           down only for a short time */
1301                                        mindelay = -1;
1302                                        bestslave = slave;
1303                                }
1304                        }
1305                        break;
1306                case BOND_LINK_DOWN:    /* the link was down */
1307                        if ((link_state & MII_LINK_READY) != MII_LINK_READY) {
1308                                /* the link stays down, nothing more to do */
1309                                break;
1310                        } else {        /* link going up */
1311                                slave->link  = BOND_LINK_BACK;
1312                                slave->delay = updelay;
1313                                
1314                                if (updelay > 0) {
1315                                        /* if updelay == 0, no need to
1316                                           advertise about a 0 ms delay */
1317                                        printk (KERN_INFO
1318                                                "%s: link status up for interface"
1319                                                " %s, enabling it in %d ms.\n",
1320                                                master->name,
1321                                                dev->name,
1322                                                updelay * miimon);
1323                                }
1324                        }
1325                        /* no break ! fall through the BOND_LINK_BACK state in
1326                           case there's something to do.
1327                        */
1328                case BOND_LINK_BACK:    /* the link has just come back */
1329                        if ((link_state & MII_LINK_UP) == 0) {
1330                                /* link down again */
1331                                slave->link  = BOND_LINK_DOWN;
1332                                printk(KERN_INFO
1333                                        "%s: link status down again after %d ms "
1334                                        "for interface %s.\n",
1335                                        master->name,
1336                                        (updelay - slave->delay) * miimon,
1337                                        dev->name);
1338                        }
1339                        else if ((link_state & MII_LINK_READY) == MII_LINK_READY) {
1340                                /* link stays up */
1341                                if (slave->delay == 0) {
1342                                        /* now the link has been up for long time enough */
1343                                        slave->link = BOND_LINK_UP;
1344
1345                                        if (mode == BOND_MODE_ACTIVEBACKUP) {
1346                                                /* prevent it from being the active one */
1347                                                slave->state = BOND_STATE_BACKUP;
1348                                        }
1349                                        else {
1350                                                /* make it immediately active */
1351                                                slave->state = BOND_STATE_ACTIVE;
1352                                        }
1353
1354                                        printk(KERN_INFO
1355                                                "%s: link status definitely up "
1356                                                "for interface %s.\n",
1357                                                master->name,
1358                                                dev->name);
1359                                }
1360                                else
1361                                        slave->delay--;
1362                                
1363                                /* we'll also look for the mostly eligible slave */
1364                                if (IS_UP(dev) && (slave->delay < mindelay)) {
1365                                        mindelay = slave->delay;
1366                                        bestslave = slave;
1367                                } 
1368                        }
1369                        break;
1370                } /* end of switch */
1371        } /* end of while */
1372
1373        /* 
1374         * if there's no active interface and we discovered that one
1375         * of the slaves could be activated earlier, so we do it.
1376         */
1377        read_lock(&bond->ptrlock);
1378        oldcurrent = bond->current_slave;
1379        read_unlock(&bond->ptrlock);
1380
1381        if (oldcurrent == NULL) {  /* no active interface at the moment */
1382                if (bestslave != NULL) { /* last chance to find one ? */
1383                        if (bestslave->link == BOND_LINK_UP) {
1384                                printk (KERN_INFO
1385                                        "%s: making interface %s the new active one.\n",
1386                                        master->name, bestslave->dev->name);
1387                        } else {
1388                                printk (KERN_INFO
1389                                        "%s: making interface %s the new "
1390                                        "active one %d ms earlier.\n",
1391                                        master->name, bestslave->dev->name,
1392                                        (updelay - bestslave->delay) * miimon);
1393
1394                                bestslave->delay = 0;
1395                                bestslave->link  = BOND_LINK_UP;
1396                        }
1397
1398                        if (mode == BOND_MODE_ACTIVEBACKUP) {
1399                                bond_set_slave_active_flags(bestslave);
1400                        } else {
1401                                bestslave->state = BOND_STATE_ACTIVE;
1402                        }
1403                        write_lock(&bond->ptrlock);
1404                        bond->current_slave = bestslave;
1405                        write_unlock(&bond->ptrlock);
1406                } else if (slave_died) {
1407                        /* print this message only once a slave has just died */
1408                        printk(KERN_INFO
1409                                "%s: now running without any active interface !\n",
1410                                master->name);
1411                }
1412        }
1413
1414        read_unlock_irqrestore(&bond->lock, flags);
1415        /* re-arm the timer */
1416        mod_timer(&bond->mii_timer, jiffies + (miimon * HZ / 1000));
1417}
1418
1419/* 
1420 * this function is called regularly to monitor each slave's link 
1421 * insuring that traffic is being sent and received.  If the adapter
1422 * has been dormant, then an arp is transmitted to generate traffic 
1423 */
1424static void bond_arp_monitor(struct net_device *master)
1425{
1426        bonding_t *bond;
1427        unsigned long flags;
1428        slave_t *slave;
1429        int the_delta_in_ticks =  arp_interval * HZ / 1000;
1430        int next_timer = jiffies + (arp_interval * HZ / 1000);
1431
1432        bond = (struct bonding *) master->priv; 
1433        if (master->priv == NULL) {
1434                mod_timer(&bond->arp_timer, next_timer);
1435                return;
1436        }
1437
1438        read_lock_irqsave(&bond->lock, flags);
1439
1440        if (!IS_UP(master)) {
1441                mod_timer(&bond->arp_timer, next_timer);
1442                goto arp_monitor_out;
1443        }
1444
1445
1446        if (rtnl_shlock_nowait()) {
1447                goto arp_monitor_out;
1448        }
1449
1450        if (rtnl_exlock_nowait()) {
1451                rtnl_shunlock();
1452                goto arp_monitor_out;
1453        }
1454
1455        /* see if any of the previous devices are up now (i.e. they have seen a 
1456         * response from an arp request sent by another adapter, since they 
1457         * have the same hardware address).
1458         */
1459
1460        slave = (slave_t *)bond;
1461        while ((slave = slave->prev) != (slave_t *)bond)  {
1462
1463                read_lock(&bond->ptrlock);
1464                if ( (!(slave->link == BOND_LINK_UP))  
1465                                && (slave != bond->current_slave) ) {
1466
1467                        read_unlock(&bond->ptrlock);
1468
1469                        if ( ((jiffies - slave->dev->trans_start) <= 
1470                                                the_delta_in_ticks) &&  
1471                             ((jiffies - slave->dev->last_rx) <= 
1472                                                the_delta_in_ticks) ) {
1473
1474                                slave->link  = BOND_LINK_UP;
1475                                write_lock(&bond->ptrlock);
1476                                if (bond->current_slave == NULL) {
1477                                        slave->state = BOND_STATE_ACTIVE;
1478                                        bond->current_slave = slave;
1479                                }
1480                                if (slave != bond->current_slave) {
1481                                        slave->dev->flags |= IFF_NOARP;
1482                                }
1483                                write_unlock(&bond->ptrlock);
1484                        } else {
1485                                if ((jiffies - slave->dev->last_rx) <= 
1486                                                the_delta_in_ticks)  {
1487                                        arp_send(ARPOP_REQUEST, ETH_P_ARP, 
1488                                                arp_target, slave->dev, 
1489                                                my_ip, arp_target_hw_addr, 
1490                                                slave->dev->dev_addr, 
1491                                                arp_target_hw_addr); 
1492                                }
1493                        }
1494                } else 
1495                        read_unlock(&bond->ptrlock);
1496        }
1497
1498        read_lock(&bond->ptrlock);
1499        slave = bond->current_slave;
1500        read_unlock(&bond->ptrlock);
1501
1502        if (slave != 0) {
1503        
1504          /* see if you need to take down the current_slave, since
1505           * you haven't seen an arp in 2*arp_intervals
1506           */
1507
1508                if ( ((jiffies - slave->dev->trans_start) >= 
1509                      (2*the_delta_in_ticks)) ||
1510                     ((jiffies - slave->dev->last_rx) >= 
1511                      (2*the_delta_in_ticks)) ) {
1512
1513                        if (slave->link == BOND_LINK_UP) {
1514                                slave->link  = BOND_LINK_DOWN;
1515                                slave->state = BOND_STATE_BACKUP;
1516                                /* 
1517                                 * we want to see arps, otherwise we couldn't 
1518                                 * bring the adapter back online...  
1519                                 */
1520                                printk(KERN_INFO "%s: link status definitely "
1521                                                 "down for interface %s, "
1522                                                 "disabling it",
1523                                       slave->dev->master->name,
1524                                       slave->dev->name);
1525                                /* find a new interface and be verbose */
1526                                change_active_interface(bond);
1527                                read_lock(&bond->ptrlock);
1528                                slave = bond->current_slave;
1529                                read_unlock(&bond->ptrlock);
1530                        }
1531                } 
1532
1533                /* 
1534                 * ok, we know up/down, so just send a arp out if there has
1535                 * been no activity for a while 
1536                 */
1537
1538                if (slave != NULL ) {
1539                        if ( ((jiffies - slave->dev->trans_start) >= 
1540                               the_delta_in_ticks) || 
1541                             ((jiffies - slave->dev->last_rx) >= 
1542                               the_delta_in_ticks) ) {
1543                                arp_send(ARPOP_REQUEST, ETH_P_ARP, 
1544                                         arp_target, slave->dev,
1545                                         my_ip, arp_target_hw_addr, 
1546                                         slave->dev->dev_addr, 
1547                                         arp_target_hw_addr); 
1548                        }
1549                } 
1550
1551        }
1552
1553        /* if we have no current slave.. try sending 
1554         * an arp on all of the interfaces 
1555         */
1556
1557        read_lock(&bond->ptrlock);
1558        if (bond->current_slave == NULL) { 
1559                read_unlock(&bond->ptrlock);
1560                slave = (slave_t *)bond;
1561                while ((slave = slave->prev) != (slave_t *)bond)   {
1562                        arp_send(ARPOP_REQUEST, ETH_P_ARP, arp_target, 
1563                                 slave->dev, my_ip, arp_target_hw_addr, 
1564                                 slave->dev->dev_addr, arp_target_hw_addr); 
1565                }
1566        }
1567        else {
1568                read_unlock(&bond->ptrlock);
1569        }
1570
1571        rtnl_exunlock();
1572        rtnl_shunlock();
1573
1574arp_monitor_out:
1575        read_unlock_irqrestore(&bond->lock, flags);
1576
1577        /* re-arm the timer */
1578        mod_timer(&bond->arp_timer, next_timer);
1579}
1580
1581#define isdigit(c) (c >= '0' && c <= '9')
1582__inline static int atoi( char **s) 
1583{
1584int i = 0;
1585while (isdigit(**s))
1586  i = i*20 + *((*s)++) - '0';
1587return i;
1588}
1589
1590#define isascii(c) (((unsigned char)(c))<=0x7f)
1591#define LF 0xA
1592#define isspace(c) (c==' ' || c=='      '|| c==LF)   
1593typedef uint32_t in_addr_t;
1594
1595int
1596my_inet_aton(char *cp, unsigned long *the_addr) {
1597        static const in_addr_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff };
1598        in_addr_t val;
1599        char c;
1600        union iaddr {
1601          uint8_t bytes[4];
1602          uint32_t word;
1603        } res;
1604        uint8_t *pp = res.bytes;
1605        int digit,base;
1606
1607        res.word = 0;
1608
1609        c = *cp;
1610        for (;;) {
1611                /*
1612                 * Collect number up to ``.''.
1613                 * Values are specified as for C:
1614                 * 0x=hex, 0=octal, isdigit=decimal.
1615                 */
1616                if (!isdigit(c)) goto ret_0;
1617                val = 0; base = 10; digit = 0;
1618                for (;;) {
1619                        if (isdigit(c)) {
1620                                val = (val * base) + (c - '0');
1621                                c = *++cp;
1622                                digit = 1;
1623                        } else {
1624                                break;
1625                        }
1626                }
1627                if (c == '.') {
1628                        /*
1629                         * Internet format:
1630                         *      a.b.c.d
1631                         *      a.b.c   (with c treated as 16 bits)
1632                         *      a.b     (with b treated as 24 bits)
1633                         */
1634                        if (pp > res.bytes + 2 || val > 0xff) {
1635                                goto ret_0;
1636                        }
1637                        *pp++ = val;
1638                        c = *++cp;
1639                } else
1640                        break;
1641        }
1642        /*
1643         * Check for trailing characters.
1644         */
1645        if (c != '\0' && (!isascii(c) || !isspace(c))) {
1646                goto ret_0;
1647        }
1648        /*
1649         * Did we get a valid digit?
1650         */
1651        if (!digit) {
1652                goto ret_0;
1653        }
1654
1655        /* Check whether the last part is in its limits depending on
1656           the number of parts in total.  */
1657        if (val > max[pp - res.bytes]) {
1658                goto ret_0;
1659        }
1660
1661        if (the_addr != NULL) {
1662                *the_addr = res.word | htonl (val);
1663        }
1664
1665        return (1);
1666
1667ret_0:
1668        return (0);
1669}
1670
1671static int bond_sethwaddr(struct net_device *master, struct net_device *slave)
1672{
1673#ifdef BONDING_DEBUG
1674        printk(KERN_CRIT "bond_sethwaddr: master=%x\n", (unsigned int)master);
1675        printk(KERN_CRIT "bond_sethwaddr: slave=%x\n", (unsigned int)slave);
1676        printk(KERN_CRIT "bond_sethwaddr: slave->addr_len=%d\n", slave->addr_len);
1677#endif
1678        memcpy(master->dev_addr, slave->dev_addr, slave->addr_len);
1679        return 0;
1680}
1681
1682static int bond_info_query(struct net_device *master, struct ifbond *info)
1683{
1684        bonding_t *bond = (struct bonding *) master->priv;
1685        slave_t *slave;
1686        unsigned long flags;
1687
1688        info->bond_mode = mode;
1689        info->num_slaves = 0;
1690        info->miimon = miimon;
1691
1692        read_lock_irqsave(&bond->lock, flags);
1693        for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) {
1694                info->num_slaves++;
1695        }
1696        read_unlock_irqrestore(&bond->lock, flags);
1697
1698        return 0;
1699}
1700
1701static int bond_slave_info_query(struct net_device *master, 
1702                                        struct ifslave *info)
1703{
1704        bonding_t *bond = (struct bonding *) master->priv;
1705        slave_t *slave;
1706        int cur_ndx = 0;
1707        unsigned long flags;
1708
1709        if (info->slave_id < 0) {
1710                return -ENODEV;
1711        }
1712
1713        read_lock_irqsave(&bond->lock, flags);
1714        for (slave = bond->prev; 
1715                 slave != (slave_t *)bond && cur_ndx < info->slave_id; 
1716                 slave = slave->prev) {
1717                cur_ndx++;
1718        }
1719        read_unlock_irqrestore(&bond->lock, flags);
1720
1721        if (cur_ndx == info->slave_id) {
1722                strcpy(info->slave_name, slave->dev->name);
1723                info->link = slave->link;
1724                info->state = slave->state;
1725                info->link_failure_count = slave->link_failure_count;
1726        } else {
1727                return -ENODEV;
1728        }
1729
1730        return 0;
1731}
1732
1733static int bond_ioctl(struct net_device *master_dev, struct ifreq *ifr, int cmd)
1734{
1735        struct net_device *slave_dev = NULL;
1736        struct ifbond *u_binfo = NULL, k_binfo;
1737        struct ifslave *u_sinfo = NULL, k_sinfo;
1738        u16 *data = NULL;
1739        int ret = 0;
1740
1741#ifdef BONDING_DEBUG
1742        printk(KERN_INFO "bond_ioctl: master=%s, cmd=%d\n", 
1743                master_dev->name, cmd);
1744#endif
1745
1746        switch (cmd) {
1747        case SIOCGMIIPHY:
1748                data = (u16 *)ifr->ifr_data;
1749                if (data == NULL) {
1750                        return -EINVAL;
1751                }
1752                data[0] = 0;
1753                /* Fall Through */
1754        case SIOCGMIIREG:
1755                /* 
1756                 * We do this again just in case we were called by SIOCGMIIREG
1757                 * instead of SIOCGMIIPHY.
1758                 */
1759                data = (u16 *)ifr->ifr_data;
1760                if (data == NULL) {
1761                        return -EINVAL;
1762                }
1763                if (data[1] == 1) {
1764                        data[3] = bond_check_mii_link(
1765                                (struct bonding *)master_dev->priv);
1766                }
1767                return 0;
1768        case BOND_INFO_QUERY_OLD:
1769        case SIOCBONDINFOQUERY:
1770                u_binfo = (struct ifbond *)ifr->ifr_data;
1771                if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond))) {
1772                        return -EFAULT;
1773                }
1774                ret = bond_info_query(master_dev, &k_binfo);
1775                if (ret == 0) {
1776                        if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond))) {
1777                                return -EFAULT;
1778                        }
1779                }
1780                return ret;
1781        case BOND_SLAVE_INFO_QUERY_OLD:
1782        case SIOCBONDSLAVEINFOQUERY:
1783                u_sinfo = (struct ifslave *)ifr->ifr_data;
1784                if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave))) {
1785                        return -EFAULT;
1786                }
1787                ret = bond_slave_info_query(master_dev, &k_sinfo);
1788                if (ret == 0) {
1789                        if (copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave))) {
1790                                return -EFAULT;
1791                        }
1792                }
1793                return ret;
1794        }
1795
1796        if (!capable(CAP_NET_ADMIN)) {
1797                return -EPERM;
1798        }
1799
1800        slave_dev = dev_get_by_name(ifr->ifr_slave);
1801
1802#ifdef BONDING_DEBUG
1803        printk(KERN_INFO "slave_dev=%x: \n", (unsigned int)slave_dev);
1804        printk(KERN_INFO "slave_dev->name=%s: \n", slave_dev->name);
1805#endif
1806
1807        if (slave_dev == NULL) {
1808                ret = -ENODEV;
1809        } else {
1810                switch (cmd) {
1811                case BOND_ENSLAVE_OLD:
1812                case SIOCBONDENSLAVE:           
1813                        ret = bond_enslave(master_dev, slave_dev);
1814                        break;
1815                case BOND_RELEASE_OLD:                  
1816                case SIOCBONDRELEASE:   
1817                        ret = bond_release(master_dev, slave_dev); 
1818                        break;
1819                case BOND_SETHWADDR_OLD:
1820                case SIOCBONDSETHWADDR: 
1821                        ret = bond_sethwaddr(master_dev, slave_dev);
1822                        break;
1823                case BOND_CHANGE_ACTIVE_OLD:
1824                case SIOCBONDCHANGEACTIVE:
1825                        if (mode == BOND_MODE_ACTIVEBACKUP) {
1826                                ret = bond_change_active(master_dev, slave_dev);
1827                        }
1828                        else {
1829                                ret = -EINVAL;
1830                        }
1831                        break;
1832                default:
1833                        ret = -EOPNOTSUPP;
1834                }
1835                dev_put(slave_dev);
1836        }
1837        return ret;
1838}
1839
1840#ifdef CONFIG_NET_FASTROUTE
1841static int bond_accept_fastpath(struct net_device *dev, struct dst_entry *dst)
1842{
1843        return -1;
1844}
1845#endif
1846
1847static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *dev)
1848{
1849        slave_t *slave, *start_at;
1850        struct bonding *bond = (struct bonding *) dev->priv;
1851        unsigned long flags;
1852
1853        if (!IS_UP(dev)) { /* bond down */
1854                dev_kfree_skb(skb);
1855                return 0;
1856        }
1857
1858        read_lock_irqsave(&bond->lock, flags);
1859
1860        read_lock(&bond->ptrlock);
1861        slave = start_at = bond->current_slave;
1862        read_unlock(&bond->ptrlock);
1863
1864        if (slave == NULL) { /* we're at the root, get the first slave */
1865                /* no suitable interface, frame not sent */
1866                dev_kfree_skb(skb);
1867                read_unlock_irqrestore(&bond->lock, flags);
1868                return 0;
1869        }
1870
1871        do {
1872                if (IS_UP(slave->dev)
1873                    && (slave->link == BOND_LINK_UP)
1874                    && (slave->state == BOND_STATE_ACTIVE)) {
1875
1876                        skb->dev = slave->dev;
1877                        skb->priority = 1;
1878                        dev_queue_xmit(skb);
1879
1880                        write_lock(&bond->ptrlock);
1881                        bond->current_slave = slave->next;
1882                        write_unlock(&bond->ptrlock);
1883
1884                        read_unlock_irqrestore(&bond->lock, flags);
1885                        return 0;
1886                }
1887        } while ((slave = slave->next) != start_at);
1888
1889        /* no suitable interface, frame not sent */
1890        dev_kfree_skb(skb);
1891        read_unlock_irqrestore(&bond->lock, flags);
1892        return 0;
1893}
1894
1895/* 
1896 * in XOR mode, we determine the output device by performing xor on
1897 * the source and destination hw adresses.  If this device is not 
1898 * enabled, find the next slave following this xor slave. 
1899 */
1900static int bond_xmit_xor(struct sk_buff *skb, struct net_device *dev)
1901{
1902        slave_t *slave, *start_at;
1903        struct bonding *bond = (struct bonding *) dev->priv;
1904        unsigned long flags;
1905        struct ethhdr *data = (struct ethhdr *)skb->data;
1906        int slave_no;
1907
1908        if (!IS_UP(dev)) { /* bond down */
1909                dev_kfree_skb(skb);
1910                return 0;
1911        }
1912
1913        read_lock_irqsave(&bond->lock, flags);
1914        slave = bond->prev;
1915
1916        /* we're at the root, get the first slave */
1917        if ((slave == NULL) || (slave->dev == NULL)) { 
1918                /* no suitable interface, frame not sent */
1919                dev_kfree_skb(skb);
1920                read_unlock_irqrestore(&bond->lock, flags);
1921                return 0;
1922        }
1923
1924        slave_no = (data->h_dest[5]^slave->dev->dev_addr[5]) % bond->slave_cnt;
1925
1926        while ( (slave_no > 0) && (slave != (slave_t *)bond) ) {
1927                slave = slave->prev;
1928                slave_no--;
1929        } 
1930        start_at = slave;
1931
1932        do {
1933                if (IS_UP(slave->dev)
1934                    && (slave->link == BOND_LINK_UP)
1935                    && (slave->state == BOND_STATE_ACTIVE)) {
1936
1937                        skb->dev = slave->dev;
1938                        skb->priority = 1;
1939                        dev_queue_xmit(skb);
1940
1941                        read_unlock_irqrestore(&bond->lock, flags);
1942                        return 0;
1943                }
1944        } while ((slave = slave->next) != start_at);
1945
1946        /* no suitable interface, frame not sent */
1947        dev_kfree_skb(skb);
1948        read_unlock_irqrestore(&bond->lock, flags);
1949        return 0;
1950}
1951
1952/* 
1953 * in active-backup mode, we know that bond->current_slave is always valid if
1954 * the bond has a usable interface.
1955 */
1956static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *dev)
1957{
1958        struct bonding *bond = (struct bonding *) dev->priv;
1959        unsigned long flags;
1960        int ret;
1961
1962        if (!IS_UP(dev)) { /* bond down */
1963                dev_kfree_skb(skb);
1964                return 0;
1965        }
1966
1967        /* if we are sending arp packets, try to at least 
1968           identify our own ip address */
1969        if ( (arp_interval > 0) && (my_ip == 0) &&
1970                (skb->protocol == __constant_htons(ETH_P_ARP) ) ) {
1971                char *the_ip = (((char *)skb->data)) 
1972                                + sizeof(struct ethhdr)  
1973                                + sizeof(struct arphdr) + 
1974                                ETH_ALEN;
1975                memcpy(&my_ip, the_ip, 4);
1976        }
1977
1978        /* if we are sending arp packets and don't know 
1979           the target hw address, save it so we don't need 
1980           to use a broadcast address */
1981        if ( (arp_interval > 0) && (arp_target_hw_addr == NULL) &&
1982             (skb->protocol == __constant_htons(ETH_P_IP) ) ) {
1983                struct ethhdr *eth_hdr = 
1984                        (struct ethhdr *) (((char *)skb->data));
1985                arp_target_hw_addr = kmalloc(ETH_ALEN, GFP_KERNEL);
1986                memcpy(arp_target_hw_addr, eth_hdr->h_dest, ETH_ALEN);
1987        }
1988
1989        read_lock_irqsave(&bond->lock, flags);
1990
1991        read_lock(&bond->ptrlock);
1992        if (bond->current_slave != NULL) { /* one usable interface */
1993                skb->dev = bond->current_slave->dev;
1994                read_unlock(&bond->ptrlock);
1995                skb->priority = 1;
1996                ret = dev_queue_xmit(skb);
1997                read_unlock_irqrestore(&bond->lock, flags);
1998                return 0;
1999        }
2000        else {
2001                read_unlock(&bond->ptrlock);
2002        }
2003
2004        /* no suitable interface, frame not sent */
2005#ifdef BONDING_DEBUG
2006        printk(KERN_INFO "There was no suitable interface, so we don't transmit\n");
2007#endif
2008        dev_kfree_skb(skb);
2009        read_unlock_irqrestore(&bond->lock, flags);
2010        return 0;
2011}
2012
2013static struct net_device_stats *bond_get_stats(struct net_device *dev)
2014{
2015        bonding_t *bond = dev->priv;
2016        struct net_device_stats *stats = bond->stats, *sstats;
2017        slave_t *slave;
2018        unsigned long flags;
2019
2020        memset(bond->stats, 0, sizeof(struct net_device_stats));
2021
2022        read_lock_irqsave(&bond->lock, flags);
2023
2024        for (slave = bond->prev; slave != (slave_t *)bond; slave = slave->prev) {
2025                sstats = slave->dev->get_stats(slave->dev);
2026 
2027                stats->rx_packets += sstats->rx_packets;
2028                stats->rx_bytes += sstats->rx_bytes;
2029                stats->rx_errors += sstats->rx_errors;
2030                stats->rx_dropped += sstats->rx_dropped;
2031
2032                stats->tx_packets += sstats->tx_packets;
2033                stats->tx_bytes += sstats->tx_bytes;
2034                stats->tx_errors += sstats->tx_errors;
2035                stats->tx_dropped += sstats->tx_dropped;
2036
2037                stats->multicast += sstats->multicast;
2038                stats->collisions += sstats->collisions;
2039
2040                stats->rx_length_errors += sstats->rx_length_errors;
2041                stats->rx_over_errors += sstats->rx_over_errors;
2042                stats->rx_crc_errors += sstats->rx_crc_errors;
2043                stats->rx_frame_errors += sstats->rx_frame_errors;
2044                stats->rx_fifo_errors += sstats->rx_fifo_errors;        
2045                stats->rx_missed_errors += sstats->rx_missed_errors;
2046        
2047                stats->tx_aborted_errors += sstats->tx_aborted_errors;
2048                stats->tx_carrier_errors += sstats->tx_carrier_errors;
2049                stats->tx_fifo_errors += sstats->tx_fifo_errors;
2050                stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
2051                stats->tx_window_errors += sstats->tx_window_errors;
2052
2053        }
2054
2055        read_unlock_irqrestore(&bond->lock, flags);
2056        return stats;
2057}
2058
2059static int bond_get_info(char *buf, char **start, off_t offset, int length)
2060{
2061        bonding_t *bond = these_bonds;
2062        int len = 0;
2063        off_t begin = 0;
2064        u16 link;
2065        slave_t *slave = NULL;
2066        unsigned long flags;
2067
2068        while (bond != NULL) {
2069                /*
2070                 * This function locks the mutex, so we can't lock it until 
2071                 * afterwards
2072                 */
2073                link = bond_check_mii_link(bond);
2074
2075                len += sprintf(buf + len, "Bonding Mode: ");
2076
2077                switch (mode) {
2078                        case BOND_MODE_ACTIVEBACKUP:
2079                                len += sprintf(buf + len, "%s\n", 
2080                                                "active-backup");
2081                        break;
2082
2083                        case BOND_MODE_ROUNDROBIN:
2084                                len += sprintf(buf + len, "%s\n", 
2085                                                "load balancing (round-robin)");
2086                        break;
2087
2088                        case BOND_MODE_XOR:
2089                                len += sprintf(buf + len, "%s\n", 
2090                                                "load balancing (xor)");
2091                        break;
2092
2093                        default:
2094                                len += sprintf(buf + len, "%s\n", 
2095                                                "unknown");
2096                        break;
2097                }
2098
2099                if (mode == BOND_MODE_ACTIVEBACKUP) {
2100                        read_lock_irqsave(&bond->lock, flags);
2101                        read_lock(&bond->ptrlock);
2102                        if (bond->current_slave != NULL) {
2103                                len += sprintf(buf + len, 
2104                                        "Currently Active Slave: %s\n", 
2105                                        bond->current_slave->dev->name);
2106                        }
2107                        read_unlock(&bond->ptrlock);
2108                        read_unlock_irqrestore(&bond->lock, flags);
2109                }
2110
2111                len += sprintf(buf + len, "MII Status: ");
2112                len += sprintf(buf + len, 
2113                                link == MII_LINK_READY ? "up\n" : "down\n");
2114                len += sprintf(buf + len, "MII Polling Interval (ms): %d\n", 
2115                                miimon);
2116                len += sprintf(buf + len, "Up Delay (ms): %d\n", updelay);
2117                len += sprintf(buf + len, "Down Delay (ms): %d\n", downdelay);
2118
2119                read_lock_irqsave(&bond->lock, flags);
2120                for (slave = bond->prev; slave != (slave_t *)bond; 
2121                     slave = slave->prev) {
2122                        len += sprintf(buf + len, "\nSlave Interface: %s\n", slave->dev->name);
2123
2124                        len += sprintf(buf + len, "MII Status: ");
2125
2126                        len += sprintf(buf + len, 
2127                                slave->link == BOND_LINK_UP ? 
2128                                "up\n" : "down\n");
2129                        len += sprintf(buf + len, "Link Failure Count: %d\n", 
2130                                slave->link_failure_count);
2131                }
2132                read_unlock_irqrestore(&bond->lock, flags);
2133
2134                /*
2135                 * Figure out the calcs for the /proc/net interface
2136                 */
2137                *start = buf + (offset - begin);
2138                len -= (offset - begin);
2139                if (len > length) {
2140                        len = length;
2141                }
2142                if (len < 0) {
2143                        len = 0;
2144                }
2145
2146
2147                bond = bond->next_bond;
2148        }
2149        return len;
2150}
2151
2152static int bond_event(struct notifier_block *this, unsigned long event, 
2153                        void *ptr)
2154{
2155        struct bonding *this_bond = (struct bonding *)these_bonds;
2156        struct bonding *last_bond;
2157        struct net_device *event_dev = (struct net_device *)ptr;
2158
2159        /* while there are bonds configured */
2160        while (this_bond != NULL) {
2161                if (this_bond == event_dev->priv ) {
2162                        switch (event) {
2163                        case NETDEV_UNREGISTER:
2164                                /* 
2165                                 * remove this bond from a linked list of 
2166                                 * bonds 
2167                                 */
2168                                if (this_bond == these_bonds) {
2169                                        these_bonds = this_bond->next_bond;
2170                                } else {
2171                                        for (last_bond = these_bonds; 
2172                                             last_bond != NULL; 
2173                                             last_bond = last_bond->next_bond) {
2174                                                if (last_bond->next_bond == 
2175                                                    this_bond) {
2176                                                        last_bond->next_bond = 
2177                                                        this_bond->next_bond;
2178                                                }
2179                                        }
2180                                }
2181                                return NOTIFY_DONE;
2182
2183                        default:
2184                                return NOTIFY_DONE;
2185                        }
2186                } else if (this_bond->device == event_dev->master) {
2187                        switch (event) {
2188                        case NETDEV_UNREGISTER:
2189                                bond_release(this_bond->device, event_dev);
2190                                break;
2191                        }
2192                        return NOTIFY_DONE;
2193                }
2194                this_bond = this_bond->next_bond;
2195        }
2196        return NOTIFY_DONE;
2197}
2198
2199static struct notifier_block bond_netdev_notifier = {
2200        .notifier_call = bond_event,
2201};
2202
2203static int __init bond_init(struct net_device *dev)
2204{
2205        bonding_t *bond, *this_bond, *last_bond;
2206
2207#ifdef BONDING_DEBUG
2208        printk (KERN_INFO "Begin bond_init for %s\n", dev->name);
2209#endif
2210        bond = kmalloc(sizeof(struct bonding), GFP_KERNEL);
2211        if (bond == NULL) {
2212                return -ENOMEM;
2213        }
2214        memset(bond, 0, sizeof(struct bonding));
2215
2216        /* initialize rwlocks */
2217        rwlock_init(&bond->lock);
2218        rwlock_init(&bond->ptrlock);
2219        
2220        bond->stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
2221        if (bond->stats == NULL) {
2222                kfree(bond);
2223                return -ENOMEM;
2224        }
2225        memset(bond->stats, 0, sizeof(struct net_device_stats));
2226
2227        bond->next = bond->prev = (slave_t *)bond;
2228        bond->current_slave = NULL;
2229        bond->device = dev;
2230        dev->priv = bond;
2231
2232        /* Initialize the device structure. */
2233        if (mode == BOND_MODE_ACTIVEBACKUP) {
2234                dev->hard_start_xmit = bond_xmit_activebackup;
2235        } else if (mode == BOND_MODE_ROUNDROBIN) {
2236                dev->hard_start_xmit = bond_xmit_roundrobin;
2237        } else if (mode == BOND_MODE_XOR) {
2238                dev->hard_start_xmit = bond_xmit_xor;
2239        } else {
2240                printk(KERN_ERR "Unknown bonding mode %d\n", mode);
2241                kfree(bond->stats);
2242                kfree(bond);
2243                return -EINVAL;
2244        }
2245
2246        dev->get_stats = bond_get_stats;
2247        dev->open = bond_open;
2248        dev->stop = bond_close;
2249        dev->set_multicast_list = set_multicast_list;
2250        dev->do_ioctl = bond_ioctl;
2251
2252        /* 
2253         * Fill in the fields of the device structure with ethernet-generic 
2254         * values. 
2255         */
2256
2257        ether_setup(dev);
2258
2259        dev->tx_queue_len = 0;
2260        dev->flags |= IFF_MASTER|IFF_MULTICAST;
2261#ifdef CONFIG_NET_FASTROUTE
2262        dev->accept_fastpath = bond_accept_fastpath;
2263#endif
2264
2265        printk(KERN_INFO "%s registered with", dev->name);
2266        if (miimon > 0) {
2267                printk(" MII link monitoring set to %d ms", miimon);
2268                updelay /= miimon;
2269                downdelay /= miimon;
2270        } else {
2271                printk("out MII link monitoring");
2272        }
2273        printk(", in %s mode.\n",mode?"active-backup":"bonding");
2274
2275#ifdef CONFIG_PROC_FS
2276        bond->bond_proc_dir = proc_mkdir(dev->name, proc_net);
2277        if (bond->bond_proc_dir == NULL) {
2278                printk(KERN_ERR "%s: Cannot init /proc/net/%s/\n", 
2279                        dev->name, dev->name);
2280                kfree(bond->stats);
2281                kfree(bond);
2282                return -ENOMEM;
2283        }
2284        bond->bond_proc_info_file = 
2285                create_proc_info_entry("info", 0, bond->bond_proc_dir, 
2286                                        bond_get_info);
2287        if (bond->bond_proc_info_file == NULL) {
2288                printk(KERN_ERR "%s: Cannot init /proc/net/%s/info\n", 
2289                        dev->name, dev->name);
2290                remove_proc_entry(dev->name, proc_net);
2291                kfree(bond->stats);
2292                kfree(bond);
2293                return -ENOMEM;
2294        }
2295#endif /* CONFIG_PROC_FS */
2296
2297        if (first_pass == 1) {
2298                these_bonds = bond;
2299                register_netdevice_notifier(&bond_netdev_notifier);
2300                first_pass = 0;
2301        } else {
2302                last_bond = these_bonds;
2303                this_bond = these_bonds->next_bond;
2304                while (this_bond != NULL) {
2305                        last_bond = this_bond;
2306                        this_bond = this_bond->next_bond;
2307                }
2308                last_bond->next_bond = bond;
2309        } 
2310
2311        return 0;
2312}
2313
2314/*
2315static int __init bond_probe(struct net_device *dev)
2316{
2317        bond_init(dev);
2318        return 0;
2319}
2320 */
2321
2322static int __init bonding_init(void)
2323{
2324        int no;
2325        int err;
2326
2327        /* Find a name for this unit */
2328        static struct net_device *dev_bond = NULL;
2329
2330        if (max_bonds < 1 || max_bonds > INT_MAX) {
2331                printk(KERN_WARNING 
2332                       "bonding_init(): max_bonds (%d) not in range %d-%d, "
2333                       "so it was reset to BOND_DEFAULT_MAX_BONDS (%d)",
2334                       max_bonds, 1, INT_MAX, BOND_DEFAULT_MAX_BONDS);
2335                max_bonds = BOND_DEFAULT_MAX_BONDS;
2336        }
2337        dev_bond = dev_bonds = kmalloc(max_bonds*sizeof(struct net_device), 
2338                                        GFP_KERNEL);
2339        if (dev_bond == NULL) {
2340                return -ENOMEM;
2341        }
2342        memset(dev_bonds, 0, max_bonds*sizeof(struct net_device));
2343
2344        if (updelay < 0) {
2345                printk(KERN_WARNING 
2346                       "bonding_init(): updelay module parameter (%d), "
2347                       "not in range 0-%d, so it was reset to 0\n",
2348                       updelay, INT_MAX);
2349                updelay = 0;
2350        }
2351
2352        if (downdelay < 0) {
2353                printk(KERN_WARNING 
2354                       "bonding_init(): downdelay module parameter (%d), "
2355                       "not in range 0-%d, so it was reset to 0\n",
2356                       downdelay, INT_MAX);
2357                downdelay = 0;
2358        }
2359
2360        if (arp_interval < 0) {
2361                printk(KERN_WARNING 
2362                       "bonding_init(): arp_interval module parameter (%d), "
2363                       "not in range 0-%d, so it was reset to %d\n",
2364                       arp_interval, INT_MAX, BOND_LINK_ARP_INTERV);
2365                arp_interval = BOND_LINK_ARP_INTERV;
2366        }
2367
2368        if (arp_ip_target) {
2369                /* TODO: check and log bad ip address */
2370                if (my_inet_aton(arp_ip_target, &arp_target) == 0)  {
2371                        arp_interval = 0;
2372                }
2373        }
2374
2375        for (no = 0; no < max_bonds; no++) {
2376                dev_bond->init = bond_init;
2377        
2378                err = dev_alloc_name(dev_bond,"bond%d");
2379                if (err < 0) {
2380                        kfree(dev_bonds);
2381                        return err;
2382                }
2383                SET_MODULE_OWNER(dev_bond);
2384                if (register_netdev(dev_bond) != 0) {
2385                        kfree(dev_bonds);
2386                        return -EIO;
2387                }       
2388                dev_bond++;
2389        }
2390        return 0;
2391}
2392
2393static void __exit bonding_exit(void)
2394{
2395        struct net_device *dev_bond = dev_bonds;
2396        struct bonding *bond;
2397        int no;
2398
2399        unregister_netdevice_notifier(&bond_netdev_notifier);
2400                 
2401        for (no = 0; no < max_bonds; no++) {
2402
2403#ifdef CONFIG_PROC_FS
2404                bond = (struct bonding *) dev_bond->priv;
2405                remove_proc_entry("info", bond->bond_proc_dir);
2406                remove_proc_entry(dev_bond->name, proc_net);
2407#endif
2408                unregister_netdev(dev_bond);
2409                kfree(bond->stats);
2410                kfree(dev_bond->priv);
2411                
2412                dev_bond->priv = NULL;
2413                dev_bond++;
2414        }
2415        kfree(dev_bonds);
2416}
2417
2418module_init(bonding_init);
2419module_exit(bonding_exit);
2420MODULE_LICENSE("GPL");
2421
2422/*
2423 * Local variables:
2424 *  c-indent-level: 8
2425 *  c-basic-offset: 8
2426 *  tab-width: 8
2427 * End:
2428 */
2429
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.