linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/errno.h>
  16#include <linux/if_arp.h>
  17#include <linux/netdevice.h>
  18#include <linux/init.h>
  19#include <linux/skbuff.h>
  20#include <linux/moduleparam.h>
  21#include <net/dst.h>
  22#include <net/neighbour.h>
  23#include <net/pkt_sched.h>
  24
  25/*
  26   How to setup it.
  27   ----------------
  28
  29   After loading this module you will find a new device teqlN
  30   and new qdisc with the same name. To join a slave to the equalizer
  31   you should just set this qdisc on a device f.e.
  32
  33   # tc qdisc add dev eth0 root teql0
  34   # tc qdisc add dev eth1 root teql0
  35
  36   That's all. Full PnP 8)
  37
  38   Applicability.
  39   --------------
  40
  41   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  42      signal and generate EOI events. If you want to equalize virtual devices
  43      like tunnels, use a normal eql device.
  44   2. This device puts no limitations on physical slave characteristics
  45      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  46      Certainly, large difference in link speeds will make the resulting
  47      eqalized link unusable, because of huge packet reordering.
  48      I estimate an upper useful difference as ~10 times.
  49   3. If the slave requires address resolution, only protocols using
  50      neighbour cache (IPv4/IPv6) will work over the equalized link.
  51      Other protocols are still allowed to use the slave device directly,
  52      which will not break load balancing, though native slave
  53      traffic will have the highest priority.  */
  54
  55struct teql_master
  56{
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61};
  62
  63struct teql_sched_data
  64{
  65        struct Qdisc *next;
  66        struct teql_master *m;
  67        struct neighbour *ncache;
  68        struct sk_buff_head q;
  69};
  70
  71#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
  72
  73#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
  74
  75/* "teql*" qdisc routines */
  76
  77static int
  78teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  79{
  80        struct net_device *dev = qdisc_dev(sch);
  81        struct teql_sched_data *q = qdisc_priv(sch);
  82
  83        if (q->q.qlen < dev->tx_queue_len) {
  84                __skb_queue_tail(&q->q, skb);
  85                sch->bstats.bytes += qdisc_pkt_len(skb);
  86                sch->bstats.packets++;
  87                return 0;
  88        }
  89
  90        kfree_skb(skb);
  91        sch->qstats.drops++;
  92        return NET_XMIT_DROP;
  93}
  94
  95static struct sk_buff *
  96teql_dequeue(struct Qdisc* sch)
  97{
  98        struct teql_sched_data *dat = qdisc_priv(sch);
  99        struct netdev_queue *dat_queue;
 100        struct sk_buff *skb;
 101
 102        skb = __skb_dequeue(&dat->q);
 103        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 104        if (skb == NULL) {
 105                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 106                if (m) {
 107                        dat->m->slaves = sch;
 108                        netif_wake_queue(m);
 109                }
 110        }
 111        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 112        return skb;
 113}
 114
 115static struct sk_buff *
 116teql_peek(struct Qdisc* sch)
 117{
 118        /* teql is meant to be used as root qdisc */
 119        return NULL;
 120}
 121
 122static __inline__ void
 123teql_neigh_release(struct neighbour *n)
 124{
 125        if (n)
 126                neigh_release(n);
 127}
 128
 129static void
 130teql_reset(struct Qdisc* sch)
 131{
 132        struct teql_sched_data *dat = qdisc_priv(sch);
 133
 134        skb_queue_purge(&dat->q);
 135        sch->q.qlen = 0;
 136        teql_neigh_release(xchg(&dat->ncache, NULL));
 137}
 138
 139static void
 140teql_destroy(struct Qdisc* sch)
 141{
 142        struct Qdisc *q, *prev;
 143        struct teql_sched_data *dat = qdisc_priv(sch);
 144        struct teql_master *master = dat->m;
 145
 146        if ((prev = master->slaves) != NULL) {
 147                do {
 148                        q = NEXT_SLAVE(prev);
 149                        if (q == sch) {
 150                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 151                                if (q == master->slaves) {
 152                                        master->slaves = NEXT_SLAVE(q);
 153                                        if (q == master->slaves) {
 154                                                struct netdev_queue *txq;
 155                                                spinlock_t *root_lock;
 156
 157                                                txq = netdev_get_tx_queue(master->dev, 0);
 158                                                master->slaves = NULL;
 159
 160                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 161                                                spin_lock_bh(root_lock);
 162                                                qdisc_reset(txq->qdisc);
 163                                                spin_unlock_bh(root_lock);
 164                                        }
 165                                }
 166                                skb_queue_purge(&dat->q);
 167                                teql_neigh_release(xchg(&dat->ncache, NULL));
 168                                break;
 169                        }
 170
 171                } while ((prev = q) != master->slaves);
 172        }
 173}
 174
 175static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 176{
 177        struct net_device *dev = qdisc_dev(sch);
 178        struct teql_master *m = (struct teql_master*)sch->ops;
 179        struct teql_sched_data *q = qdisc_priv(sch);
 180
 181        if (dev->hard_header_len > m->dev->hard_header_len)
 182                return -EINVAL;
 183
 184        if (m->dev == dev)
 185                return -ELOOP;
 186
 187        q->m = m;
 188
 189        skb_queue_head_init(&q->q);
 190
 191        if (m->slaves) {
 192                if (m->dev->flags & IFF_UP) {
 193                        if ((m->dev->flags & IFF_POINTOPOINT &&
 194                             !(dev->flags & IFF_POINTOPOINT)) ||
 195                            (m->dev->flags & IFF_BROADCAST &&
 196                             !(dev->flags & IFF_BROADCAST)) ||
 197                            (m->dev->flags & IFF_MULTICAST &&
 198                             !(dev->flags & IFF_MULTICAST)) ||
 199                            dev->mtu < m->dev->mtu)
 200                                return -EINVAL;
 201                } else {
 202                        if (!(dev->flags&IFF_POINTOPOINT))
 203                                m->dev->flags &= ~IFF_POINTOPOINT;
 204                        if (!(dev->flags&IFF_BROADCAST))
 205                                m->dev->flags &= ~IFF_BROADCAST;
 206                        if (!(dev->flags&IFF_MULTICAST))
 207                                m->dev->flags &= ~IFF_MULTICAST;
 208                        if (dev->mtu < m->dev->mtu)
 209                                m->dev->mtu = dev->mtu;
 210                }
 211                q->next = NEXT_SLAVE(m->slaves);
 212                NEXT_SLAVE(m->slaves) = sch;
 213        } else {
 214                q->next = sch;
 215                m->slaves = sch;
 216                m->dev->mtu = dev->mtu;
 217                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 218        }
 219        return 0;
 220}
 221
 222
 223static int
 224__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 225{
 226        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 227        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 228        struct neighbour *mn = skb_dst(skb)->neighbour;
 229        struct neighbour *n = q->ncache;
 230
 231        if (mn->tbl == NULL)
 232                return -EINVAL;
 233        if (n && n->tbl == mn->tbl &&
 234            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 235                atomic_inc(&n->refcnt);
 236        } else {
 237                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 238                if (IS_ERR(n))
 239                        return PTR_ERR(n);
 240        }
 241        if (neigh_event_send(n, skb_res) == 0) {
 242                int err;
 243
 244                read_lock(&n->lock);
 245                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 246                                      n->ha, NULL, skb->len);
 247                read_unlock(&n->lock);
 248
 249                if (err < 0) {
 250                        neigh_release(n);
 251                        return -EINVAL;
 252                }
 253                teql_neigh_release(xchg(&q->ncache, n));
 254                return 0;
 255        }
 256        neigh_release(n);
 257        return (skb_res == NULL) ? -EAGAIN : 1;
 258}
 259
 260static inline int teql_resolve(struct sk_buff *skb,
 261                               struct sk_buff *skb_res, struct net_device *dev)
 262{
 263        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 264        if (txq->qdisc == &noop_qdisc)
 265                return -ENODEV;
 266
 267        if (dev->header_ops == NULL ||
 268            skb_dst(skb) == NULL ||
 269            skb_dst(skb)->neighbour == NULL)
 270                return 0;
 271        return __teql_resolve(skb, skb_res, dev);
 272}
 273
 274static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 275{
 276        struct teql_master *master = netdev_priv(dev);
 277        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 278        struct Qdisc *start, *q;
 279        int busy;
 280        int nores;
 281        int subq = skb_get_queue_mapping(skb);
 282        struct sk_buff *skb_res = NULL;
 283
 284        start = master->slaves;
 285
 286restart:
 287        nores = 0;
 288        busy = 0;
 289
 290        if ((q = start) == NULL)
 291                goto drop;
 292
 293        do {
 294                struct net_device *slave = qdisc_dev(q);
 295                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 296                const struct net_device_ops *slave_ops = slave->netdev_ops;
 297
 298                if (slave_txq->qdisc_sleeping != q)
 299                        continue;
 300                if (__netif_subqueue_stopped(slave, subq) ||
 301                    !netif_running(slave)) {
 302                        busy = 1;
 303                        continue;
 304                }
 305
 306                switch (teql_resolve(skb, skb_res, slave)) {
 307                case 0:
 308                        if (__netif_tx_trylock(slave_txq)) {
 309                                unsigned int length = qdisc_pkt_len(skb);
 310
 311                                if (!netif_tx_queue_stopped(slave_txq) &&
 312                                    !netif_tx_queue_frozen(slave_txq) &&
 313                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 314                                        txq_trans_update(slave_txq);
 315                                        __netif_tx_unlock(slave_txq);
 316                                        master->slaves = NEXT_SLAVE(q);
 317                                        netif_wake_queue(dev);
 318                                        txq->tx_packets++;
 319                                        txq->tx_bytes += length;
 320                                        return NETDEV_TX_OK;
 321                                }
 322                                __netif_tx_unlock(slave_txq);
 323                        }
 324                        if (netif_queue_stopped(dev))
 325                                busy = 1;
 326                        break;
 327                case 1:
 328                        master->slaves = NEXT_SLAVE(q);
 329                        return NETDEV_TX_OK;
 330                default:
 331                        nores = 1;
 332                        break;
 333                }
 334                __skb_pull(skb, skb_network_offset(skb));
 335        } while ((q = NEXT_SLAVE(q)) != start);
 336
 337        if (nores && skb_res == NULL) {
 338                skb_res = skb;
 339                goto restart;
 340        }
 341
 342        if (busy) {
 343                netif_stop_queue(dev);
 344                return NETDEV_TX_BUSY;
 345        }
 346        dev->stats.tx_errors++;
 347
 348drop:
 349        txq->tx_dropped++;
 350        dev_kfree_skb(skb);
 351        return NETDEV_TX_OK;
 352}
 353
 354static int teql_master_open(struct net_device *dev)
 355{
 356        struct Qdisc * q;
 357        struct teql_master *m = netdev_priv(dev);
 358        int mtu = 0xFFFE;
 359        unsigned flags = IFF_NOARP|IFF_MULTICAST;
 360
 361        if (m->slaves == NULL)
 362                return -EUNATCH;
 363
 364        flags = FMASK;
 365
 366        q = m->slaves;
 367        do {
 368                struct net_device *slave = qdisc_dev(q);
 369
 370                if (slave == NULL)
 371                        return -EUNATCH;
 372
 373                if (slave->mtu < mtu)
 374                        mtu = slave->mtu;
 375                if (slave->hard_header_len > LL_MAX_HEADER)
 376                        return -EINVAL;
 377
 378                /* If all the slaves are BROADCAST, master is BROADCAST
 379                   If all the slaves are PtP, master is PtP
 380                   Otherwise, master is NBMA.
 381                 */
 382                if (!(slave->flags&IFF_POINTOPOINT))
 383                        flags &= ~IFF_POINTOPOINT;
 384                if (!(slave->flags&IFF_BROADCAST))
 385                        flags &= ~IFF_BROADCAST;
 386                if (!(slave->flags&IFF_MULTICAST))
 387                        flags &= ~IFF_MULTICAST;
 388        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 389
 390        m->dev->mtu = mtu;
 391        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 392        netif_start_queue(m->dev);
 393        return 0;
 394}
 395
 396static int teql_master_close(struct net_device *dev)
 397{
 398        netif_stop_queue(dev);
 399        return 0;
 400}
 401
 402static int teql_master_mtu(struct net_device *dev, int new_mtu)
 403{
 404        struct teql_master *m = netdev_priv(dev);
 405        struct Qdisc *q;
 406
 407        if (new_mtu < 68)
 408                return -EINVAL;
 409
 410        q = m->slaves;
 411        if (q) {
 412                do {
 413                        if (new_mtu > qdisc_dev(q)->mtu)
 414                                return -EINVAL;
 415                } while ((q=NEXT_SLAVE(q)) != m->slaves);
 416        }
 417
 418        dev->mtu = new_mtu;
 419        return 0;
 420}
 421
 422static const struct net_device_ops teql_netdev_ops = {
 423        .ndo_open       = teql_master_open,
 424        .ndo_stop       = teql_master_close,
 425        .ndo_start_xmit = teql_master_xmit,
 426        .ndo_change_mtu = teql_master_mtu,
 427};
 428
 429static __init void teql_master_setup(struct net_device *dev)
 430{
 431        struct teql_master *master = netdev_priv(dev);
 432        struct Qdisc_ops *ops = &master->qops;
 433
 434        master->dev     = dev;
 435        ops->priv_size  = sizeof(struct teql_sched_data);
 436
 437        ops->enqueue    =       teql_enqueue;
 438        ops->dequeue    =       teql_dequeue;
 439        ops->peek       =       teql_peek;
 440        ops->init       =       teql_qdisc_init;
 441        ops->reset      =       teql_reset;
 442        ops->destroy    =       teql_destroy;
 443        ops->owner      =       THIS_MODULE;
 444
 445        dev->netdev_ops =       &teql_netdev_ops;
 446        dev->type               = ARPHRD_VOID;
 447        dev->mtu                = 1500;
 448        dev->tx_queue_len       = 100;
 449        dev->flags              = IFF_NOARP;
 450        dev->hard_header_len    = LL_MAX_HEADER;
 451}
 452
 453static LIST_HEAD(master_dev_list);
 454static int max_equalizers = 1;
 455module_param(max_equalizers, int, 0);
 456MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 457
 458static int __init teql_init(void)
 459{
 460        int i;
 461        int err = -ENODEV;
 462
 463        for (i = 0; i < max_equalizers; i++) {
 464                struct net_device *dev;
 465                struct teql_master *master;
 466
 467                dev = alloc_netdev(sizeof(struct teql_master),
 468                                  "teql%d", teql_master_setup);
 469                if (!dev) {
 470                        err = -ENOMEM;
 471                        break;
 472                }
 473
 474                if ((err = register_netdev(dev))) {
 475                        free_netdev(dev);
 476                        break;
 477                }
 478
 479                master = netdev_priv(dev);
 480
 481                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 482                err = register_qdisc(&master->qops);
 483
 484                if (err) {
 485                        unregister_netdev(dev);
 486                        free_netdev(dev);
 487                        break;
 488                }
 489
 490                list_add_tail(&master->master_list, &master_dev_list);
 491        }
 492        return i ? 0 : err;
 493}
 494
 495static void __exit teql_exit(void)
 496{
 497        struct teql_master *master, *nxt;
 498
 499        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 500
 501                list_del(&master->master_list);
 502
 503                unregister_qdisc(&master->qops);
 504                unregister_netdev(master->dev);
 505                free_netdev(master->dev);
 506        }
 507}
 508
 509module_init(teql_init);
 510module_exit(teql_exit);
 511
 512MODULE_LICENSE("GPL");
 513
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.