linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/errno.h>
  16#include <linux/if_arp.h>
  17#include <linux/netdevice.h>
  18#include <linux/init.h>
  19#include <linux/skbuff.h>
  20#include <linux/moduleparam.h>
  21#include <net/dst.h>
  22#include <net/neighbour.h>
  23#include <net/pkt_sched.h>
  24
  25/*
  26   How to setup it.
  27   ----------------
  28
  29   After loading this module you will find a new device teqlN
  30   and new qdisc with the same name. To join a slave to the equalizer
  31   you should just set this qdisc on a device f.e.
  32
  33   # tc qdisc add dev eth0 root teql0
  34   # tc qdisc add dev eth1 root teql0
  35
  36   That's all. Full PnP 8)
  37
  38   Applicability.
  39   --------------
  40
  41   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  42      signal and generate EOI events. If you want to equalize virtual devices
  43      like tunnels, use a normal eql device.
  44   2. This device puts no limitations on physical slave characteristics
  45      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  46      Certainly, large difference in link speeds will make the resulting
  47      eqalized link unusable, because of huge packet reordering.
  48      I estimate an upper useful difference as ~10 times.
  49   3. If the slave requires address resolution, only protocols using
  50      neighbour cache (IPv4/IPv6) will work over the equalized link.
  51      Other protocols are still allowed to use the slave device directly,
  52      which will not break load balancing, though native slave
  53      traffic will have the highest priority.  */
  54
  55struct teql_master
  56{
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        struct net_device_stats stats;
  62};
  63
  64struct teql_sched_data
  65{
  66        struct Qdisc *next;
  67        struct teql_master *m;
  68        struct neighbour *ncache;
  69        struct sk_buff_head q;
  70};
  71
  72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
  73
  74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
  75
  76/* "teql*" qdisc routines */
  77
  78static int
  79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  80{
  81        struct net_device *dev = qdisc_dev(sch);
  82        struct teql_sched_data *q = qdisc_priv(sch);
  83
  84        if (q->q.qlen < dev->tx_queue_len) {
  85                __skb_queue_tail(&q->q, skb);
  86                sch->bstats.bytes += qdisc_pkt_len(skb);
  87                sch->bstats.packets++;
  88                return 0;
  89        }
  90
  91        kfree_skb(skb);
  92        sch->qstats.drops++;
  93        return NET_XMIT_DROP;
  94}
  95
  96static struct sk_buff *
  97teql_dequeue(struct Qdisc* sch)
  98{
  99        struct teql_sched_data *dat = qdisc_priv(sch);
 100        struct netdev_queue *dat_queue;
 101        struct sk_buff *skb;
 102
 103        skb = __skb_dequeue(&dat->q);
 104        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        }
 112        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 113        return skb;
 114}
 115
 116static struct sk_buff *
 117teql_peek(struct Qdisc* sch)
 118{
 119        /* teql is meant to be used as root qdisc */
 120        return NULL;
 121}
 122
 123static __inline__ void
 124teql_neigh_release(struct neighbour *n)
 125{
 126        if (n)
 127                neigh_release(n);
 128}
 129
 130static void
 131teql_reset(struct Qdisc* sch)
 132{
 133        struct teql_sched_data *dat = qdisc_priv(sch);
 134
 135        skb_queue_purge(&dat->q);
 136        sch->q.qlen = 0;
 137        teql_neigh_release(xchg(&dat->ncache, NULL));
 138}
 139
 140static void
 141teql_destroy(struct Qdisc* sch)
 142{
 143        struct Qdisc *q, *prev;
 144        struct teql_sched_data *dat = qdisc_priv(sch);
 145        struct teql_master *master = dat->m;
 146
 147        if ((prev = master->slaves) != NULL) {
 148                do {
 149                        q = NEXT_SLAVE(prev);
 150                        if (q == sch) {
 151                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 152                                if (q == master->slaves) {
 153                                        master->slaves = NEXT_SLAVE(q);
 154                                        if (q == master->slaves) {
 155                                                struct netdev_queue *txq;
 156                                                spinlock_t *root_lock;
 157
 158                                                txq = netdev_get_tx_queue(master->dev, 0);
 159                                                master->slaves = NULL;
 160
 161                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 162                                                spin_lock_bh(root_lock);
 163                                                qdisc_reset(txq->qdisc);
 164                                                spin_unlock_bh(root_lock);
 165                                        }
 166                                }
 167                                skb_queue_purge(&dat->q);
 168                                teql_neigh_release(xchg(&dat->ncache, NULL));
 169                                break;
 170                        }
 171
 172                } while ((prev = q) != master->slaves);
 173        }
 174}
 175
 176static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 177{
 178        struct net_device *dev = qdisc_dev(sch);
 179        struct teql_master *m = (struct teql_master*)sch->ops;
 180        struct teql_sched_data *q = qdisc_priv(sch);
 181
 182        if (dev->hard_header_len > m->dev->hard_header_len)
 183                return -EINVAL;
 184
 185        if (m->dev == dev)
 186                return -ELOOP;
 187
 188        q->m = m;
 189
 190        skb_queue_head_init(&q->q);
 191
 192        if (m->slaves) {
 193                if (m->dev->flags & IFF_UP) {
 194                        if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
 195                            || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
 196                            || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
 197                            || dev->mtu < m->dev->mtu)
 198                                return -EINVAL;
 199                } else {
 200                        if (!(dev->flags&IFF_POINTOPOINT))
 201                                m->dev->flags &= ~IFF_POINTOPOINT;
 202                        if (!(dev->flags&IFF_BROADCAST))
 203                                m->dev->flags &= ~IFF_BROADCAST;
 204                        if (!(dev->flags&IFF_MULTICAST))
 205                                m->dev->flags &= ~IFF_MULTICAST;
 206                        if (dev->mtu < m->dev->mtu)
 207                                m->dev->mtu = dev->mtu;
 208                }
 209                q->next = NEXT_SLAVE(m->slaves);
 210                NEXT_SLAVE(m->slaves) = sch;
 211        } else {
 212                q->next = sch;
 213                m->slaves = sch;
 214                m->dev->mtu = dev->mtu;
 215                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 216        }
 217        return 0;
 218}
 219
 220
 221static int
 222__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 223{
 224        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 225        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 226        struct neighbour *mn = skb->dst->neighbour;
 227        struct neighbour *n = q->ncache;
 228
 229        if (mn->tbl == NULL)
 230                return -EINVAL;
 231        if (n && n->tbl == mn->tbl &&
 232            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 233                atomic_inc(&n->refcnt);
 234        } else {
 235                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 236                if (IS_ERR(n))
 237                        return PTR_ERR(n);
 238        }
 239        if (neigh_event_send(n, skb_res) == 0) {
 240                int err;
 241
 242                read_lock(&n->lock);
 243                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 244                                      n->ha, NULL, skb->len);
 245                read_unlock(&n->lock);
 246
 247                if (err < 0) {
 248                        neigh_release(n);
 249                        return -EINVAL;
 250                }
 251                teql_neigh_release(xchg(&q->ncache, n));
 252                return 0;
 253        }
 254        neigh_release(n);
 255        return (skb_res == NULL) ? -EAGAIN : 1;
 256}
 257
 258static inline int teql_resolve(struct sk_buff *skb,
 259                               struct sk_buff *skb_res, struct net_device *dev)
 260{
 261        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 262        if (txq->qdisc == &noop_qdisc)
 263                return -ENODEV;
 264
 265        if (dev->header_ops == NULL ||
 266            skb->dst == NULL ||
 267            skb->dst->neighbour == NULL)
 268                return 0;
 269        return __teql_resolve(skb, skb_res, dev);
 270}
 271
 272static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 273{
 274        struct teql_master *master = netdev_priv(dev);
 275        struct Qdisc *start, *q;
 276        int busy;
 277        int nores;
 278        int subq = skb_get_queue_mapping(skb);
 279        struct sk_buff *skb_res = NULL;
 280
 281        start = master->slaves;
 282
 283restart:
 284        nores = 0;
 285        busy = 0;
 286
 287        if ((q = start) == NULL)
 288                goto drop;
 289
 290        do {
 291                struct net_device *slave = qdisc_dev(q);
 292                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 293                const struct net_device_ops *slave_ops = slave->netdev_ops;
 294
 295                if (slave_txq->qdisc_sleeping != q)
 296                        continue;
 297                if (__netif_subqueue_stopped(slave, subq) ||
 298                    !netif_running(slave)) {
 299                        busy = 1;
 300                        continue;
 301                }
 302
 303                switch (teql_resolve(skb, skb_res, slave)) {
 304                case 0:
 305                        if (__netif_tx_trylock(slave_txq)) {
 306                                unsigned int length = qdisc_pkt_len(skb);
 307
 308                                if (!netif_tx_queue_stopped(slave_txq) &&
 309                                    !netif_tx_queue_frozen(slave_txq) &&
 310                                    slave_ops->ndo_start_xmit(skb, slave) == 0) {
 311                                        __netif_tx_unlock(slave_txq);
 312                                        master->slaves = NEXT_SLAVE(q);
 313                                        netif_wake_queue(dev);
 314                                        master->stats.tx_packets++;
 315                                        master->stats.tx_bytes += length;
 316                                        return 0;
 317                                }
 318                                __netif_tx_unlock(slave_txq);
 319                        }
 320                        if (netif_queue_stopped(dev))
 321                                busy = 1;
 322                        break;
 323                case 1:
 324                        master->slaves = NEXT_SLAVE(q);
 325                        return 0;
 326                default:
 327                        nores = 1;
 328                        break;
 329                }
 330                __skb_pull(skb, skb_network_offset(skb));
 331        } while ((q = NEXT_SLAVE(q)) != start);
 332
 333        if (nores && skb_res == NULL) {
 334                skb_res = skb;
 335                goto restart;
 336        }
 337
 338        if (busy) {
 339                netif_stop_queue(dev);
 340                return 1;
 341        }
 342        master->stats.tx_errors++;
 343
 344drop:
 345        master->stats.tx_dropped++;
 346        dev_kfree_skb(skb);
 347        return 0;
 348}
 349
 350static int teql_master_open(struct net_device *dev)
 351{
 352        struct Qdisc * q;
 353        struct teql_master *m = netdev_priv(dev);
 354        int mtu = 0xFFFE;
 355        unsigned flags = IFF_NOARP|IFF_MULTICAST;
 356
 357        if (m->slaves == NULL)
 358                return -EUNATCH;
 359
 360        flags = FMASK;
 361
 362        q = m->slaves;
 363        do {
 364                struct net_device *slave = qdisc_dev(q);
 365
 366                if (slave == NULL)
 367                        return -EUNATCH;
 368
 369                if (slave->mtu < mtu)
 370                        mtu = slave->mtu;
 371                if (slave->hard_header_len > LL_MAX_HEADER)
 372                        return -EINVAL;
 373
 374                /* If all the slaves are BROADCAST, master is BROADCAST
 375                   If all the slaves are PtP, master is PtP
 376                   Otherwise, master is NBMA.
 377                 */
 378                if (!(slave->flags&IFF_POINTOPOINT))
 379                        flags &= ~IFF_POINTOPOINT;
 380                if (!(slave->flags&IFF_BROADCAST))
 381                        flags &= ~IFF_BROADCAST;
 382                if (!(slave->flags&IFF_MULTICAST))
 383                        flags &= ~IFF_MULTICAST;
 384        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 385
 386        m->dev->mtu = mtu;
 387        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 388        netif_start_queue(m->dev);
 389        return 0;
 390}
 391
 392static int teql_master_close(struct net_device *dev)
 393{
 394        netif_stop_queue(dev);
 395        return 0;
 396}
 397
 398static struct net_device_stats *teql_master_stats(struct net_device *dev)
 399{
 400        struct teql_master *m = netdev_priv(dev);
 401        return &m->stats;
 402}
 403
 404static int teql_master_mtu(struct net_device *dev, int new_mtu)
 405{
 406        struct teql_master *m = netdev_priv(dev);
 407        struct Qdisc *q;
 408
 409        if (new_mtu < 68)
 410                return -EINVAL;
 411
 412        q = m->slaves;
 413        if (q) {
 414                do {
 415                        if (new_mtu > qdisc_dev(q)->mtu)
 416                                return -EINVAL;
 417                } while ((q=NEXT_SLAVE(q)) != m->slaves);
 418        }
 419
 420        dev->mtu = new_mtu;
 421        return 0;
 422}
 423
 424static const struct net_device_ops teql_netdev_ops = {
 425        .ndo_open       = teql_master_open,
 426        .ndo_stop       = teql_master_close,
 427        .ndo_start_xmit = teql_master_xmit,
 428        .ndo_get_stats  = teql_master_stats,
 429        .ndo_change_mtu = teql_master_mtu,
 430};
 431
 432static __init void teql_master_setup(struct net_device *dev)
 433{
 434        struct teql_master *master = netdev_priv(dev);
 435        struct Qdisc_ops *ops = &master->qops;
 436
 437        master->dev     = dev;
 438        ops->priv_size  = sizeof(struct teql_sched_data);
 439
 440        ops->enqueue    =       teql_enqueue;
 441        ops->dequeue    =       teql_dequeue;
 442        ops->peek       =       teql_peek;
 443        ops->init       =       teql_qdisc_init;
 444        ops->reset      =       teql_reset;
 445        ops->destroy    =       teql_destroy;
 446        ops->owner      =       THIS_MODULE;
 447
 448        dev->netdev_ops =       &teql_netdev_ops;
 449        dev->type               = ARPHRD_VOID;
 450        dev->mtu                = 1500;
 451        dev->tx_queue_len       = 100;
 452        dev->flags              = IFF_NOARP;
 453        dev->hard_header_len    = LL_MAX_HEADER;
 454}
 455
 456static LIST_HEAD(master_dev_list);
 457static int max_equalizers = 1;
 458module_param(max_equalizers, int, 0);
 459MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 460
 461static int __init teql_init(void)
 462{
 463        int i;
 464        int err = -ENODEV;
 465
 466        for (i = 0; i < max_equalizers; i++) {
 467                struct net_device *dev;
 468                struct teql_master *master;
 469
 470                dev = alloc_netdev(sizeof(struct teql_master),
 471                                  "teql%d", teql_master_setup);
 472                if (!dev) {
 473                        err = -ENOMEM;
 474                        break;
 475                }
 476
 477                if ((err = register_netdev(dev))) {
 478                        free_netdev(dev);
 479                        break;
 480                }
 481
 482                master = netdev_priv(dev);
 483
 484                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 485                err = register_qdisc(&master->qops);
 486
 487                if (err) {
 488                        unregister_netdev(dev);
 489                        free_netdev(dev);
 490                        break;
 491                }
 492
 493                list_add_tail(&master->master_list, &master_dev_list);
 494        }
 495        return i ? 0 : err;
 496}
 497
 498static void __exit teql_exit(void)
 499{
 500        struct teql_master *master, *nxt;
 501
 502        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 503
 504                list_del(&master->master_list);
 505
 506                unregister_qdisc(&master->qops);
 507                unregister_netdev(master->dev);
 508                free_netdev(master->dev);
 509        }
 510}
 511
 512module_init(teql_init);
 513module_exit(teql_exit);
 514
 515MODULE_LICENSE("GPL");
 516