linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/errno.h>
  16#include <linux/if_arp.h>
  17#include <linux/netdevice.h>
  18#include <linux/init.h>
  19#include <linux/skbuff.h>
  20#include <linux/moduleparam.h>
  21#include <net/dst.h>
  22#include <net/neighbour.h>
  23#include <net/pkt_sched.h>
  24
  25/*
  26   How to setup it.
  27   ----------------
  28
  29   After loading this module you will find a new device teqlN
  30   and new qdisc with the same name. To join a slave to the equalizer
  31   you should just set this qdisc on a device f.e.
  32
  33   # tc qdisc add dev eth0 root teql0
  34   # tc qdisc add dev eth1 root teql0
  35
  36   That's all. Full PnP 8)
  37
  38   Applicability.
  39   --------------
  40
  41   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  42      signal and generate EOI events. If you want to equalize virtual devices
  43      like tunnels, use a normal eql device.
  44   2. This device puts no limitations on physical slave characteristics
  45      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  46      Certainly, large difference in link speeds will make the resulting
  47      eqalized link unusable, because of huge packet reordering.
  48      I estimate an upper useful difference as ~10 times.
  49   3. If the slave requires address resolution, only protocols using
  50      neighbour cache (IPv4/IPv6) will work over the equalized link.
  51      Other protocols are still allowed to use the slave device directly,
  52      which will not break load balancing, though native slave
  53      traffic will have the highest priority.  */
  54
  55struct teql_master
  56{
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        struct net_device_stats stats;
  62};
  63
  64struct teql_sched_data
  65{
  66        struct Qdisc *next;
  67        struct teql_master *m;
  68        struct neighbour *ncache;
  69        struct sk_buff_head q;
  70};
  71
  72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
  73
  74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
  75
  76/* "teql*" qdisc routines */
  77
  78static int
  79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  80{
  81        struct net_device *dev = qdisc_dev(sch);
  82        struct teql_sched_data *q = qdisc_priv(sch);
  83
  84        if (q->q.qlen < dev->tx_queue_len) {
  85                __skb_queue_tail(&q->q, skb);
  86                sch->bstats.bytes += qdisc_pkt_len(skb);
  87                sch->bstats.packets++;
  88                return 0;
  89        }
  90
  91        kfree_skb(skb);
  92        sch->qstats.drops++;
  93        return NET_XMIT_DROP;
  94}
  95
  96static struct sk_buff *
  97teql_dequeue(struct Qdisc* sch)
  98{
  99        struct teql_sched_data *dat = qdisc_priv(sch);
 100        struct netdev_queue *dat_queue;
 101        struct sk_buff *skb;
 102
 103        skb = __skb_dequeue(&dat->q);
 104        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        }
 112        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 113        return skb;
 114}
 115
 116static struct sk_buff *
 117teql_peek(struct Qdisc* sch)
 118{
 119        /* teql is meant to be used as root qdisc */
 120        return NULL;
 121}
 122
 123static __inline__ void
 124teql_neigh_release(struct neighbour *n)
 125{
 126        if (n)
 127                neigh_release(n);
 128}
 129
 130static void
 131teql_reset(struct Qdisc* sch)
 132{
 133        struct teql_sched_data *dat = qdisc_priv(sch);
 134
 135        skb_queue_purge(&dat->q);
 136        sch->q.qlen = 0;
 137        teql_neigh_release(xchg(&dat->ncache, NULL));
 138}
 139
 140static void
 141teql_destroy(struct Qdisc* sch)
 142{
 143        struct Qdisc *q, *prev;
 144        struct teql_sched_data *dat = qdisc_priv(sch);
 145        struct teql_master *master = dat->m;
 146
 147        if ((prev = master->slaves) != NULL) {
 148                do {
 149                        q = NEXT_SLAVE(prev);
 150                        if (q == sch) {
 151                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 152                                if (q == master->slaves) {
 153                                        master->slaves = NEXT_SLAVE(q);
 154                                        if (q == master->slaves) {
 155                                                struct netdev_queue *txq;
 156                                                spinlock_t *root_lock;
 157
 158                                                txq = netdev_get_tx_queue(master->dev, 0);
 159                                                master->slaves = NULL;
 160
 161                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 162                                                spin_lock_bh(root_lock);
 163                                                qdisc_reset(txq->qdisc);
 164                                                spin_unlock_bh(root_lock);
 165                                        }
 166                                }
 167                                skb_queue_purge(&dat->q);
 168                                teql_neigh_release(xchg(&dat->ncache, NULL));
 169                                break;
 170                        }
 171
 172                } while ((prev = q) != master->slaves);
 173        }
 174}
 175
 176static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 177{
 178        struct net_device *dev = qdisc_dev(sch);
 179        struct teql_master *m = (struct teql_master*)sch->ops;
 180        struct teql_sched_data *q = qdisc_priv(sch);
 181
 182        if (dev->hard_header_len > m->dev->hard_header_len)
 183                return -EINVAL;
 184
 185        if (m->dev == dev)
 186                return -ELOOP;
 187
 188        q->m = m;
 189
 190        skb_queue_head_init(&q->q);
 191
 192        if (m->slaves) {
 193                if (m->dev->flags & IFF_UP) {
 194                        if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
 195                            || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
 196                            || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
 197                            || dev->mtu < m->dev->mtu)
 198                                return -EINVAL;
 199                } else {
 200                        if (!(dev->flags&IFF_POINTOPOINT))
 201                                m->dev->flags &= ~IFF_POINTOPOINT;
 202                        if (!(dev->flags&IFF_BROADCAST))
 203                                m->dev->flags &= ~IFF_BROADCAST;
 204                        if (!(dev->flags&IFF_MULTICAST))
 205                                m->dev->flags &= ~IFF_MULTICAST;
 206                        if (dev->mtu < m->dev->mtu)
 207                                m->dev->mtu = dev->mtu;
 208                }
 209                q->next = NEXT_SLAVE(m->slaves);
 210                NEXT_SLAVE(m->slaves) = sch;
 211        } else {
 212                q->next = sch;
 213                m->slaves = sch;
 214                m->dev->mtu = dev->mtu;
 215                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 216        }
 217        return 0;
 218}
 219
 220
 221static int
 222__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 223{
 224        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 225        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 226        struct neighbour *mn = skb->dst->neighbour;
 227        struct neighbour *n = q->ncache;
 228
 229        if (mn->tbl == NULL)
 230                return -EINVAL;
 231        if (n && n->tbl == mn->tbl &&
 232            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 233                atomic_inc(&n->refcnt);
 234        } else {
 235                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 236                if (IS_ERR(n))
 237                        return PTR_ERR(n);
 238        }
 239        if (neigh_event_send(n, skb_res) == 0) {
 240                int err;
 241
 242                read_lock(&n->lock);
 243                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 244                                      n->ha, NULL, skb->len);
 245                read_unlock(&n->lock);
 246
 247                if (err < 0) {
 248                        neigh_release(n);
 249                        return -EINVAL;
 250                }
 251                teql_neigh_release(xchg(&q->ncache, n));
 252                return 0;
 253        }
 254        neigh_release(n);
 255        return (skb_res == NULL) ? -EAGAIN : 1;
 256}
 257
 258static inline int teql_resolve(struct sk_buff *skb,
 259                               struct sk_buff *skb_res, struct net_device *dev)
 260{
 261        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 262        if (txq->qdisc == &noop_qdisc)
 263                return -ENODEV;
 264
 265        if (dev->header_ops == NULL ||
 266            skb->dst == NULL ||
 267            skb->dst->neighbour == NULL)
 268                return 0;
 269        return __teql_resolve(skb, skb_res, dev);
 270}
 271
 272static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 273{
 274        struct teql_master *master = netdev_priv(dev);
 275        struct Qdisc *start, *q;
 276        int busy;
 277        int nores;
 278        int subq = skb_get_queue_mapping(skb);
 279        struct sk_buff *skb_res = NULL;
 280
 281        start = master->slaves;
 282
 283restart:
 284        nores = 0;
 285        busy = 0;
 286
 287        if ((q = start) == NULL)
 288                goto drop;
 289
 290        do {
 291                struct net_device *slave = qdisc_dev(q);
 292                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 293                const struct net_device_ops *slave_ops = slave->netdev_ops;
 294
 295                if (slave_txq->qdisc_sleeping != q)
 296                        continue;
 297                if (__netif_subqueue_stopped(slave, subq) ||
 298                    !netif_running(slave)) {
 299                        busy = 1;
 300                        continue;
 301                }
 302
 303                switch (teql_resolve(skb, skb_res, slave)) {
 304                case 0:
 305                        if (__netif_tx_trylock(slave_txq)) {
 306                                if (!netif_tx_queue_stopped(slave_txq) &&
 307                                    !netif_tx_queue_frozen(slave_txq) &&
 308                                    slave_ops->ndo_start_xmit(skb, slave) == 0) {
 309                                        __netif_tx_unlock(slave_txq);
 310                                        master->slaves = NEXT_SLAVE(q);
 311                                        netif_wake_queue(dev);
 312                                        master->stats.tx_packets++;
 313                                        master->stats.tx_bytes +=
 314                                                qdisc_pkt_len(skb);
 315                                        return 0;
 316                                }
 317                                __netif_tx_unlock(slave_txq);
 318                        }
 319                        if (netif_queue_stopped(dev))
 320                                busy = 1;
 321                        break;
 322                case 1:
 323                        master->slaves = NEXT_SLAVE(q);
 324                        return 0;
 325                default:
 326                        nores = 1;
 327                        break;
 328                }
 329                __skb_pull(skb, skb_network_offset(skb));
 330        } while ((q = NEXT_SLAVE(q)) != start);
 331
 332        if (nores && skb_res == NULL) {
 333                skb_res = skb;
 334                goto restart;
 335        }
 336
 337        if (busy) {
 338                netif_stop_queue(dev);
 339                return 1;
 340        }
 341        master->stats.tx_errors++;
 342
 343drop:
 344        master->stats.tx_dropped++;
 345        dev_kfree_skb(skb);
 346        return 0;
 347}
 348
 349static int teql_master_open(struct net_device *dev)
 350{
 351        struct Qdisc * q;
 352        struct teql_master *m = netdev_priv(dev);
 353        int mtu = 0xFFFE;
 354        unsigned flags = IFF_NOARP|IFF_MULTICAST;
 355
 356        if (m->slaves == NULL)
 357                return -EUNATCH;
 358
 359        flags = FMASK;
 360
 361        q = m->slaves;
 362        do {
 363                struct net_device *slave = qdisc_dev(q);
 364
 365                if (slave == NULL)
 366                        return -EUNATCH;
 367
 368                if (slave->mtu < mtu)
 369                        mtu = slave->mtu;
 370                if (slave->hard_header_len > LL_MAX_HEADER)
 371                        return -EINVAL;
 372
 373                /* If all the slaves are BROADCAST, master is BROADCAST
 374                   If all the slaves are PtP, master is PtP
 375                   Otherwise, master is NBMA.
 376                 */
 377                if (!(slave->flags&IFF_POINTOPOINT))
 378                        flags &= ~IFF_POINTOPOINT;
 379                if (!(slave->flags&IFF_BROADCAST))
 380                        flags &= ~IFF_BROADCAST;
 381                if (!(slave->flags&IFF_MULTICAST))
 382                        flags &= ~IFF_MULTICAST;
 383        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 384
 385        m->dev->mtu = mtu;
 386        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 387        netif_start_queue(m->dev);
 388        return 0;
 389}
 390
 391static int teql_master_close(struct net_device *dev)
 392{
 393        netif_stop_queue(dev);
 394        return 0;
 395}
 396
 397static struct net_device_stats *teql_master_stats(struct net_device *dev)
 398{
 399        struct teql_master *m = netdev_priv(dev);
 400        return &m->stats;
 401}
 402
 403static int teql_master_mtu(struct net_device *dev, int new_mtu)
 404{
 405        struct teql_master *m = netdev_priv(dev);
 406        struct Qdisc *q;
 407
 408        if (new_mtu < 68)
 409                return -EINVAL;
 410
 411        q = m->slaves;
 412        if (q) {
 413                do {
 414                        if (new_mtu > qdisc_dev(q)->mtu)
 415                                return -EINVAL;
 416                } while ((q=NEXT_SLAVE(q)) != m->slaves);
 417        }
 418
 419        dev->mtu = new_mtu;
 420        return 0;
 421}
 422
 423static const struct net_device_ops teql_netdev_ops = {
 424        .ndo_open       = teql_master_open,
 425        .ndo_stop       = teql_master_close,
 426        .ndo_start_xmit = teql_master_xmit,
 427        .ndo_get_stats  = teql_master_stats,
 428        .ndo_change_mtu = teql_master_mtu,
 429};
 430
 431static __init void teql_master_setup(struct net_device *dev)
 432{
 433        struct teql_master *master = netdev_priv(dev);
 434        struct Qdisc_ops *ops = &master->qops;
 435
 436        master->dev     = dev;
 437        ops->priv_size  = sizeof(struct teql_sched_data);
 438
 439        ops->enqueue    =       teql_enqueue;
 440        ops->dequeue    =       teql_dequeue;
 441        ops->peek       =       teql_peek;
 442        ops->init       =       teql_qdisc_init;
 443        ops->reset      =       teql_reset;
 444        ops->destroy    =       teql_destroy;
 445        ops->owner      =       THIS_MODULE;
 446
 447        dev->netdev_ops =       &teql_netdev_ops;
 448        dev->type               = ARPHRD_VOID;
 449        dev->mtu                = 1500;
 450        dev->tx_queue_len       = 100;
 451        dev->flags              = IFF_NOARP;
 452        dev->hard_header_len    = LL_MAX_HEADER;
 453}
 454
 455static LIST_HEAD(master_dev_list);
 456static int max_equalizers = 1;
 457module_param(max_equalizers, int, 0);
 458MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 459
 460static int __init teql_init(void)
 461{
 462        int i;
 463        int err = -ENODEV;
 464
 465        for (i = 0; i < max_equalizers; i++) {
 466                struct net_device *dev;
 467                struct teql_master *master;
 468
 469                dev = alloc_netdev(sizeof(struct teql_master),
 470                                  "teql%d", teql_master_setup);
 471                if (!dev) {
 472                        err = -ENOMEM;
 473                        break;
 474                }
 475
 476                if ((err = register_netdev(dev))) {
 477                        free_netdev(dev);
 478                        break;
 479                }
 480
 481                master = netdev_priv(dev);
 482
 483                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 484                err = register_qdisc(&master->qops);
 485
 486                if (err) {
 487                        unregister_netdev(dev);
 488                        free_netdev(dev);
 489                        break;
 490                }
 491
 492                list_add_tail(&master->master_list, &master_dev_list);
 493        }
 494        return i ? 0 : err;
 495}
 496
 497static void __exit teql_exit(void)
 498{
 499        struct teql_master *master, *nxt;
 500
 501        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 502
 503                list_del(&master->master_list);
 504
 505                unregister_qdisc(&master->qops);
 506                unregister_netdev(master->dev);
 507                free_netdev(master->dev);
 508        }
 509}
 510
 511module_init(teql_init);
 512module_exit(teql_exit);
 513
 514MODULE_LICENSE("GPL");
 515