linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master
  57{
  58        struct Qdisc_ops qops;
  59        struct net_device *dev;
  60        struct Qdisc *slaves;
  61        struct list_head master_list;
  62};
  63
  64struct teql_sched_data
  65{
  66        struct Qdisc *next;
  67        struct teql_master *m;
  68        struct neighbour *ncache;
  69        struct sk_buff_head q;
  70};
  71
  72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
  73
  74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
  75
  76/* "teql*" qdisc routines */
  77
  78static int
  79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  80{
  81        struct net_device *dev = qdisc_dev(sch);
  82        struct teql_sched_data *q = qdisc_priv(sch);
  83
  84        if (q->q.qlen < dev->tx_queue_len) {
  85                __skb_queue_tail(&q->q, skb);
  86                sch->bstats.bytes += qdisc_pkt_len(skb);
  87                sch->bstats.packets++;
  88                return NET_XMIT_SUCCESS;
  89        }
  90
  91        kfree_skb(skb);
  92        sch->qstats.drops++;
  93        return NET_XMIT_DROP;
  94}
  95
  96static struct sk_buff *
  97teql_dequeue(struct Qdisc* sch)
  98{
  99        struct teql_sched_data *dat = qdisc_priv(sch);
 100        struct netdev_queue *dat_queue;
 101        struct sk_buff *skb;
 102
 103        skb = __skb_dequeue(&dat->q);
 104        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        }
 112        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 113        return skb;
 114}
 115
 116static struct sk_buff *
 117teql_peek(struct Qdisc* sch)
 118{
 119        /* teql is meant to be used as root qdisc */
 120        return NULL;
 121}
 122
 123static __inline__ void
 124teql_neigh_release(struct neighbour *n)
 125{
 126        if (n)
 127                neigh_release(n);
 128}
 129
 130static void
 131teql_reset(struct Qdisc* sch)
 132{
 133        struct teql_sched_data *dat = qdisc_priv(sch);
 134
 135        skb_queue_purge(&dat->q);
 136        sch->q.qlen = 0;
 137        teql_neigh_release(xchg(&dat->ncache, NULL));
 138}
 139
 140static void
 141teql_destroy(struct Qdisc* sch)
 142{
 143        struct Qdisc *q, *prev;
 144        struct teql_sched_data *dat = qdisc_priv(sch);
 145        struct teql_master *master = dat->m;
 146
 147        if ((prev = master->slaves) != NULL) {
 148                do {
 149                        q = NEXT_SLAVE(prev);
 150                        if (q == sch) {
 151                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 152                                if (q == master->slaves) {
 153                                        master->slaves = NEXT_SLAVE(q);
 154                                        if (q == master->slaves) {
 155                                                struct netdev_queue *txq;
 156                                                spinlock_t *root_lock;
 157
 158                                                txq = netdev_get_tx_queue(master->dev, 0);
 159                                                master->slaves = NULL;
 160
 161                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 162                                                spin_lock_bh(root_lock);
 163                                                qdisc_reset(txq->qdisc);
 164                                                spin_unlock_bh(root_lock);
 165                                        }
 166                                }
 167                                skb_queue_purge(&dat->q);
 168                                teql_neigh_release(xchg(&dat->ncache, NULL));
 169                                break;
 170                        }
 171
 172                } while ((prev = q) != master->slaves);
 173        }
 174}
 175
 176static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 177{
 178        struct net_device *dev = qdisc_dev(sch);
 179        struct teql_master *m = (struct teql_master*)sch->ops;
 180        struct teql_sched_data *q = qdisc_priv(sch);
 181
 182        if (dev->hard_header_len > m->dev->hard_header_len)
 183                return -EINVAL;
 184
 185        if (m->dev == dev)
 186                return -ELOOP;
 187
 188        q->m = m;
 189
 190        skb_queue_head_init(&q->q);
 191
 192        if (m->slaves) {
 193                if (m->dev->flags & IFF_UP) {
 194                        if ((m->dev->flags & IFF_POINTOPOINT &&
 195                             !(dev->flags & IFF_POINTOPOINT)) ||
 196                            (m->dev->flags & IFF_BROADCAST &&
 197                             !(dev->flags & IFF_BROADCAST)) ||
 198                            (m->dev->flags & IFF_MULTICAST &&
 199                             !(dev->flags & IFF_MULTICAST)) ||
 200                            dev->mtu < m->dev->mtu)
 201                                return -EINVAL;
 202                } else {
 203                        if (!(dev->flags&IFF_POINTOPOINT))
 204                                m->dev->flags &= ~IFF_POINTOPOINT;
 205                        if (!(dev->flags&IFF_BROADCAST))
 206                                m->dev->flags &= ~IFF_BROADCAST;
 207                        if (!(dev->flags&IFF_MULTICAST))
 208                                m->dev->flags &= ~IFF_MULTICAST;
 209                        if (dev->mtu < m->dev->mtu)
 210                                m->dev->mtu = dev->mtu;
 211                }
 212                q->next = NEXT_SLAVE(m->slaves);
 213                NEXT_SLAVE(m->slaves) = sch;
 214        } else {
 215                q->next = sch;
 216                m->slaves = sch;
 217                m->dev->mtu = dev->mtu;
 218                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 219        }
 220        return 0;
 221}
 222
 223
 224static int
 225__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 226{
 227        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 228        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 229        struct neighbour *mn = skb_dst(skb)->neighbour;
 230        struct neighbour *n = q->ncache;
 231
 232        if (mn->tbl == NULL)
 233                return -EINVAL;
 234        if (n && n->tbl == mn->tbl &&
 235            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 236                atomic_inc(&n->refcnt);
 237        } else {
 238                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 239                if (IS_ERR(n))
 240                        return PTR_ERR(n);
 241        }
 242        if (neigh_event_send(n, skb_res) == 0) {
 243                int err;
 244
 245                read_lock(&n->lock);
 246                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 247                                      n->ha, NULL, skb->len);
 248                read_unlock(&n->lock);
 249
 250                if (err < 0) {
 251                        neigh_release(n);
 252                        return -EINVAL;
 253                }
 254                teql_neigh_release(xchg(&q->ncache, n));
 255                return 0;
 256        }
 257        neigh_release(n);
 258        return (skb_res == NULL) ? -EAGAIN : 1;
 259}
 260
 261static inline int teql_resolve(struct sk_buff *skb,
 262                               struct sk_buff *skb_res, struct net_device *dev)
 263{
 264        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 265        if (txq->qdisc == &noop_qdisc)
 266                return -ENODEV;
 267
 268        if (dev->header_ops == NULL ||
 269            skb_dst(skb) == NULL ||
 270            skb_dst(skb)->neighbour == NULL)
 271                return 0;
 272        return __teql_resolve(skb, skb_res, dev);
 273}
 274
 275static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 276{
 277        struct teql_master *master = netdev_priv(dev);
 278        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 279        struct Qdisc *start, *q;
 280        int busy;
 281        int nores;
 282        int subq = skb_get_queue_mapping(skb);
 283        struct sk_buff *skb_res = NULL;
 284
 285        start = master->slaves;
 286
 287restart:
 288        nores = 0;
 289        busy = 0;
 290
 291        if ((q = start) == NULL)
 292                goto drop;
 293
 294        do {
 295                struct net_device *slave = qdisc_dev(q);
 296                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 297                const struct net_device_ops *slave_ops = slave->netdev_ops;
 298
 299                if (slave_txq->qdisc_sleeping != q)
 300                        continue;
 301                if (__netif_subqueue_stopped(slave, subq) ||
 302                    !netif_running(slave)) {
 303                        busy = 1;
 304                        continue;
 305                }
 306
 307                switch (teql_resolve(skb, skb_res, slave)) {
 308                case 0:
 309                        if (__netif_tx_trylock(slave_txq)) {
 310                                unsigned int length = qdisc_pkt_len(skb);
 311
 312                                if (!netif_tx_queue_stopped(slave_txq) &&
 313                                    !netif_tx_queue_frozen(slave_txq) &&
 314                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 315                                        txq_trans_update(slave_txq);
 316                                        __netif_tx_unlock(slave_txq);
 317                                        master->slaves = NEXT_SLAVE(q);
 318                                        netif_wake_queue(dev);
 319                                        txq->tx_packets++;
 320                                        txq->tx_bytes += length;
 321                                        return NETDEV_TX_OK;
 322                                }
 323                                __netif_tx_unlock(slave_txq);
 324                        }
 325                        if (netif_queue_stopped(dev))
 326                                busy = 1;
 327                        break;
 328                case 1:
 329                        master->slaves = NEXT_SLAVE(q);
 330                        return NETDEV_TX_OK;
 331                default:
 332                        nores = 1;
 333                        break;
 334                }
 335                __skb_pull(skb, skb_network_offset(skb));
 336        } while ((q = NEXT_SLAVE(q)) != start);
 337
 338        if (nores && skb_res == NULL) {
 339                skb_res = skb;
 340                goto restart;
 341        }
 342
 343        if (busy) {
 344                netif_stop_queue(dev);
 345                return NETDEV_TX_BUSY;
 346        }
 347        dev->stats.tx_errors++;
 348
 349drop:
 350        txq->tx_dropped++;
 351        dev_kfree_skb(skb);
 352        return NETDEV_TX_OK;
 353}
 354
 355static int teql_master_open(struct net_device *dev)
 356{
 357        struct Qdisc * q;
 358        struct teql_master *m = netdev_priv(dev);
 359        int mtu = 0xFFFE;
 360        unsigned flags = IFF_NOARP|IFF_MULTICAST;
 361
 362        if (m->slaves == NULL)
 363                return -EUNATCH;
 364
 365        flags = FMASK;
 366
 367        q = m->slaves;
 368        do {
 369                struct net_device *slave = qdisc_dev(q);
 370
 371                if (slave == NULL)
 372                        return -EUNATCH;
 373
 374                if (slave->mtu < mtu)
 375                        mtu = slave->mtu;
 376                if (slave->hard_header_len > LL_MAX_HEADER)
 377                        return -EINVAL;
 378
 379                /* If all the slaves are BROADCAST, master is BROADCAST
 380                   If all the slaves are PtP, master is PtP
 381                   Otherwise, master is NBMA.
 382                 */
 383                if (!(slave->flags&IFF_POINTOPOINT))
 384                        flags &= ~IFF_POINTOPOINT;
 385                if (!(slave->flags&IFF_BROADCAST))
 386                        flags &= ~IFF_BROADCAST;
 387                if (!(slave->flags&IFF_MULTICAST))
 388                        flags &= ~IFF_MULTICAST;
 389        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 390
 391        m->dev->mtu = mtu;
 392        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 393        netif_start_queue(m->dev);
 394        return 0;
 395}
 396
 397static int teql_master_close(struct net_device *dev)
 398{
 399        netif_stop_queue(dev);
 400        return 0;
 401}
 402
 403static int teql_master_mtu(struct net_device *dev, int new_mtu)
 404{
 405        struct teql_master *m = netdev_priv(dev);
 406        struct Qdisc *q;
 407
 408        if (new_mtu < 68)
 409                return -EINVAL;
 410
 411        q = m->slaves;
 412        if (q) {
 413                do {
 414                        if (new_mtu > qdisc_dev(q)->mtu)
 415                                return -EINVAL;
 416                } while ((q=NEXT_SLAVE(q)) != m->slaves);
 417        }
 418
 419        dev->mtu = new_mtu;
 420        return 0;
 421}
 422
 423static const struct net_device_ops teql_netdev_ops = {
 424        .ndo_open       = teql_master_open,
 425        .ndo_stop       = teql_master_close,
 426        .ndo_start_xmit = teql_master_xmit,
 427        .ndo_change_mtu = teql_master_mtu,
 428};
 429
 430static __init void teql_master_setup(struct net_device *dev)
 431{
 432        struct teql_master *master = netdev_priv(dev);
 433        struct Qdisc_ops *ops = &master->qops;
 434
 435        master->dev     = dev;
 436        ops->priv_size  = sizeof(struct teql_sched_data);
 437
 438        ops->enqueue    =       teql_enqueue;
 439        ops->dequeue    =       teql_dequeue;
 440        ops->peek       =       teql_peek;
 441        ops->init       =       teql_qdisc_init;
 442        ops->reset      =       teql_reset;
 443        ops->destroy    =       teql_destroy;
 444        ops->owner      =       THIS_MODULE;
 445
 446        dev->netdev_ops =       &teql_netdev_ops;
 447        dev->type               = ARPHRD_VOID;
 448        dev->mtu                = 1500;
 449        dev->tx_queue_len       = 100;
 450        dev->flags              = IFF_NOARP;
 451        dev->hard_header_len    = LL_MAX_HEADER;
 452        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 453}
 454
 455static LIST_HEAD(master_dev_list);
 456static int max_equalizers = 1;
 457module_param(max_equalizers, int, 0);
 458MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 459
 460static int __init teql_init(void)
 461{
 462        int i;
 463        int err = -ENODEV;
 464
 465        for (i = 0; i < max_equalizers; i++) {
 466                struct net_device *dev;
 467                struct teql_master *master;
 468
 469                dev = alloc_netdev(sizeof(struct teql_master),
 470                                  "teql%d", teql_master_setup);
 471                if (!dev) {
 472                        err = -ENOMEM;
 473                        break;
 474                }
 475
 476                if ((err = register_netdev(dev))) {
 477                        free_netdev(dev);
 478                        break;
 479                }
 480
 481                master = netdev_priv(dev);
 482
 483                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 484                err = register_qdisc(&master->qops);
 485
 486                if (err) {
 487                        unregister_netdev(dev);
 488                        free_netdev(dev);
 489                        break;
 490                }
 491
 492                list_add_tail(&master->master_list, &master_dev_list);
 493        }
 494        return i ? 0 : err;
 495}
 496
 497static void __exit teql_exit(void)
 498{
 499        struct teql_master *master, *nxt;
 500
 501        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 502
 503                list_del(&master->master_list);
 504
 505                unregister_qdisc(&master->qops);
 506                unregister_netdev(master->dev);
 507                free_netdev(master->dev);
 508        }
 509}
 510
 511module_init(teql_init);
 512module_exit(teql_exit);
 513
 514MODULE_LICENSE("GPL");
 515