linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/errno.h>
  16#include <linux/if_arp.h>
  17#include <linux/netdevice.h>
  18#include <linux/init.h>
  19#include <linux/skbuff.h>
  20#include <linux/moduleparam.h>
  21#include <net/dst.h>
  22#include <net/neighbour.h>
  23#include <net/pkt_sched.h>
  24
  25/*
  26   How to setup it.
  27   ----------------
  28
  29   After loading this module you will find a new device teqlN
  30   and new qdisc with the same name. To join a slave to the equalizer
  31   you should just set this qdisc on a device f.e.
  32
  33   # tc qdisc add dev eth0 root teql0
  34   # tc qdisc add dev eth1 root teql0
  35
  36   That's all. Full PnP 8)
  37
  38   Applicability.
  39   --------------
  40
  41   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  42      signal and generate EOI events. If you want to equalize virtual devices
  43      like tunnels, use a normal eql device.
  44   2. This device puts no limitations on physical slave characteristics
  45      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  46      Certainly, large difference in link speeds will make the resulting
  47      eqalized link unusable, because of huge packet reordering.
  48      I estimate an upper useful difference as ~10 times.
  49   3. If the slave requires address resolution, only protocols using
  50      neighbour cache (IPv4/IPv6) will work over the equalized link.
  51      Other protocols are still allowed to use the slave device directly,
  52      which will not break load balancing, though native slave
  53      traffic will have the highest priority.  */
  54
  55struct teql_master
  56{
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        struct net_device_stats stats;
  62};
  63
  64struct teql_sched_data
  65{
  66        struct Qdisc *next;
  67        struct teql_master *m;
  68        struct neighbour *ncache;
  69        struct sk_buff_head q;
  70};
  71
  72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
  73
  74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
  75
  76/* "teql*" qdisc routines */
  77
  78static int
  79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  80{
  81        struct net_device *dev = qdisc_dev(sch);
  82        struct teql_sched_data *q = qdisc_priv(sch);
  83
  84        if (q->q.qlen < dev->tx_queue_len) {
  85                __skb_queue_tail(&q->q, skb);
  86                sch->bstats.bytes += qdisc_pkt_len(skb);
  87                sch->bstats.packets++;
  88                return 0;
  89        }
  90
  91        kfree_skb(skb);
  92        sch->qstats.drops++;
  93        return NET_XMIT_DROP;
  94}
  95
  96static int
  97teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
  98{
  99        struct teql_sched_data *q = qdisc_priv(sch);
 100
 101        __skb_queue_head(&q->q, skb);
 102        sch->qstats.requeues++;
 103        return 0;
 104}
 105
 106static struct sk_buff *
 107teql_dequeue(struct Qdisc* sch)
 108{
 109        struct teql_sched_data *dat = qdisc_priv(sch);
 110        struct netdev_queue *dat_queue;
 111        struct sk_buff *skb;
 112
 113        skb = __skb_dequeue(&dat->q);
 114        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 115        if (skb == NULL) {
 116                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 117                if (m) {
 118                        dat->m->slaves = sch;
 119                        netif_wake_queue(m);
 120                }
 121        }
 122        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 123        return skb;
 124}
 125
 126static __inline__ void
 127teql_neigh_release(struct neighbour *n)
 128{
 129        if (n)
 130                neigh_release(n);
 131}
 132
 133static void
 134teql_reset(struct Qdisc* sch)
 135{
 136        struct teql_sched_data *dat = qdisc_priv(sch);
 137
 138        skb_queue_purge(&dat->q);
 139        sch->q.qlen = 0;
 140        teql_neigh_release(xchg(&dat->ncache, NULL));
 141}
 142
 143static void
 144teql_destroy(struct Qdisc* sch)
 145{
 146        struct Qdisc *q, *prev;
 147        struct teql_sched_data *dat = qdisc_priv(sch);
 148        struct teql_master *master = dat->m;
 149
 150        if ((prev = master->slaves) != NULL) {
 151                do {
 152                        q = NEXT_SLAVE(prev);
 153                        if (q == sch) {
 154                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 155                                if (q == master->slaves) {
 156                                        master->slaves = NEXT_SLAVE(q);
 157                                        if (q == master->slaves) {
 158                                                struct netdev_queue *txq;
 159                                                spinlock_t *root_lock;
 160
 161                                                txq = netdev_get_tx_queue(master->dev, 0);
 162                                                master->slaves = NULL;
 163
 164                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 165                                                spin_lock_bh(root_lock);
 166                                                qdisc_reset(txq->qdisc);
 167                                                spin_unlock_bh(root_lock);
 168                                        }
 169                                }
 170                                skb_queue_purge(&dat->q);
 171                                teql_neigh_release(xchg(&dat->ncache, NULL));
 172                                break;
 173                        }
 174
 175                } while ((prev = q) != master->slaves);
 176        }
 177}
 178
 179static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 180{
 181        struct net_device *dev = qdisc_dev(sch);
 182        struct teql_master *m = (struct teql_master*)sch->ops;
 183        struct teql_sched_data *q = qdisc_priv(sch);
 184
 185        if (dev->hard_header_len > m->dev->hard_header_len)
 186                return -EINVAL;
 187
 188        if (m->dev == dev)
 189                return -ELOOP;
 190
 191        q->m = m;
 192
 193        skb_queue_head_init(&q->q);
 194
 195        if (m->slaves) {
 196                if (m->dev->flags & IFF_UP) {
 197                        if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
 198                            || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
 199                            || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
 200                            || dev->mtu < m->dev->mtu)
 201                                return -EINVAL;
 202                } else {
 203                        if (!(dev->flags&IFF_POINTOPOINT))
 204                                m->dev->flags &= ~IFF_POINTOPOINT;
 205                        if (!(dev->flags&IFF_BROADCAST))
 206                                m->dev->flags &= ~IFF_BROADCAST;
 207                        if (!(dev->flags&IFF_MULTICAST))
 208                                m->dev->flags &= ~IFF_MULTICAST;
 209                        if (dev->mtu < m->dev->mtu)
 210                                m->dev->mtu = dev->mtu;
 211                }
 212                q->next = NEXT_SLAVE(m->slaves);
 213                NEXT_SLAVE(m->slaves) = sch;
 214        } else {
 215                q->next = sch;
 216                m->slaves = sch;
 217                m->dev->mtu = dev->mtu;
 218                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 219        }
 220        return 0;
 221}
 222
 223
 224static int
 225__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
 226{
 227        struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
 228        struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
 229        struct neighbour *mn = skb->dst->neighbour;
 230        struct neighbour *n = q->ncache;
 231
 232        if (mn->tbl == NULL)
 233                return -EINVAL;
 234        if (n && n->tbl == mn->tbl &&
 235            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 236                atomic_inc(&n->refcnt);
 237        } else {
 238                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 239                if (IS_ERR(n))
 240                        return PTR_ERR(n);
 241        }
 242        if (neigh_event_send(n, skb_res) == 0) {
 243                int err;
 244
 245                read_lock(&n->lock);
 246                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
 247                                      n->ha, NULL, skb->len);
 248                read_unlock(&n->lock);
 249
 250                if (err < 0) {
 251                        neigh_release(n);
 252                        return -EINVAL;
 253                }
 254                teql_neigh_release(xchg(&q->ncache, n));
 255                return 0;
 256        }
 257        neigh_release(n);
 258        return (skb_res == NULL) ? -EAGAIN : 1;
 259}
 260
 261static inline int teql_resolve(struct sk_buff *skb,
 262                               struct sk_buff *skb_res, struct net_device *dev)
 263{
 264        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
 265        if (txq->qdisc == &noop_qdisc)
 266                return -ENODEV;
 267
 268        if (dev->header_ops == NULL ||
 269            skb->dst == NULL ||
 270            skb->dst->neighbour == NULL)
 271                return 0;
 272        return __teql_resolve(skb, skb_res, dev);
 273}
 274
 275static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 276{
 277        struct teql_master *master = netdev_priv(dev);
 278        struct Qdisc *start, *q;
 279        int busy;
 280        int nores;
 281        int subq = skb_get_queue_mapping(skb);
 282        struct sk_buff *skb_res = NULL;
 283
 284        start = master->slaves;
 285
 286restart:
 287        nores = 0;
 288        busy = 0;
 289
 290        if ((q = start) == NULL)
 291                goto drop;
 292
 293        do {
 294                struct net_device *slave = qdisc_dev(q);
 295                struct netdev_queue *slave_txq;
 296
 297                slave_txq = netdev_get_tx_queue(slave, 0);
 298                if (slave_txq->qdisc_sleeping != q)
 299                        continue;
 300                if (__netif_subqueue_stopped(slave, subq) ||
 301                    !netif_running(slave)) {
 302                        busy = 1;
 303                        continue;
 304                }
 305
 306                switch (teql_resolve(skb, skb_res, slave)) {
 307                case 0:
 308                        if (__netif_tx_trylock(slave_txq)) {
 309                                if (!netif_tx_queue_stopped(slave_txq) &&
 310                                    !netif_tx_queue_frozen(slave_txq) &&
 311                                    slave->hard_start_xmit(skb, slave) == 0) {
 312                                        __netif_tx_unlock(slave_txq);
 313                                        master->slaves = NEXT_SLAVE(q);
 314                                        netif_wake_queue(dev);
 315                                        master->stats.tx_packets++;
 316                                        master->stats.tx_bytes +=
 317                                                qdisc_pkt_len(skb);
 318                                        return 0;
 319                                }
 320                                __netif_tx_unlock(slave_txq);
 321                        }
 322                        if (netif_queue_stopped(dev))
 323                                busy = 1;
 324                        break;
 325                case 1:
 326                        master->slaves = NEXT_SLAVE(q);
 327                        return 0;
 328                default:
 329                        nores = 1;
 330                        break;
 331                }
 332                __skb_pull(skb, skb_network_offset(skb));
 333        } while ((q = NEXT_SLAVE(q)) != start);
 334
 335        if (nores && skb_res == NULL) {
 336                skb_res = skb;
 337                goto restart;
 338        }
 339
 340        if (busy) {
 341                netif_stop_queue(dev);
 342                return 1;
 343        }
 344        master->stats.tx_errors++;
 345
 346drop:
 347        master->stats.tx_dropped++;
 348        dev_kfree_skb(skb);
 349        return 0;
 350}
 351
 352static int teql_master_open(struct net_device *dev)
 353{
 354        struct Qdisc * q;
 355        struct teql_master *m = netdev_priv(dev);
 356        int mtu = 0xFFFE;
 357        unsigned flags = IFF_NOARP|IFF_MULTICAST;
 358
 359        if (m->slaves == NULL)
 360                return -EUNATCH;
 361
 362        flags = FMASK;
 363
 364        q = m->slaves;
 365        do {
 366                struct net_device *slave = qdisc_dev(q);
 367
 368                if (slave == NULL)
 369                        return -EUNATCH;
 370
 371                if (slave->mtu < mtu)
 372                        mtu = slave->mtu;
 373                if (slave->hard_header_len > LL_MAX_HEADER)
 374                        return -EINVAL;
 375
 376                /* If all the slaves are BROADCAST, master is BROADCAST
 377                   If all the slaves are PtP, master is PtP
 378                   Otherwise, master is NBMA.
 379                 */
 380                if (!(slave->flags&IFF_POINTOPOINT))
 381                        flags &= ~IFF_POINTOPOINT;
 382                if (!(slave->flags&IFF_BROADCAST))
 383                        flags &= ~IFF_BROADCAST;
 384                if (!(slave->flags&IFF_MULTICAST))
 385                        flags &= ~IFF_MULTICAST;
 386        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 387
 388        m->dev->mtu = mtu;
 389        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 390        netif_start_queue(m->dev);
 391        return 0;
 392}
 393
 394static int teql_master_close(struct net_device *dev)
 395{
 396        netif_stop_queue(dev);
 397        return 0;
 398}
 399
 400static struct net_device_stats *teql_master_stats(struct net_device *dev)
 401{
 402        struct teql_master *m = netdev_priv(dev);
 403        return &m->stats;
 404}
 405
 406static int teql_master_mtu(struct net_device *dev, int new_mtu)
 407{
 408        struct teql_master *m = netdev_priv(dev);
 409        struct Qdisc *q;
 410
 411        if (new_mtu < 68)
 412                return -EINVAL;
 413
 414        q = m->slaves;
 415        if (q) {
 416                do {
 417                        if (new_mtu > qdisc_dev(q)->mtu)
 418                                return -EINVAL;
 419                } while ((q=NEXT_SLAVE(q)) != m->slaves);
 420        }
 421
 422        dev->mtu = new_mtu;
 423        return 0;
 424}
 425
 426static __init void teql_master_setup(struct net_device *dev)
 427{
 428        struct teql_master *master = netdev_priv(dev);
 429        struct Qdisc_ops *ops = &master->qops;
 430
 431        master->dev     = dev;
 432        ops->priv_size  = sizeof(struct teql_sched_data);
 433
 434        ops->enqueue    =       teql_enqueue;
 435        ops->dequeue    =       teql_dequeue;
 436        ops->requeue    =       teql_requeue;
 437        ops->init       =       teql_qdisc_init;
 438        ops->reset      =       teql_reset;
 439        ops->destroy    =       teql_destroy;
 440        ops->owner      =       THIS_MODULE;
 441
 442        dev->open               = teql_master_open;
 443        dev->hard_start_xmit    = teql_master_xmit;
 444        dev->stop               = teql_master_close;
 445        dev->get_stats          = teql_master_stats;
 446        dev->change_mtu         = teql_master_mtu;
 447        dev->type               = ARPHRD_VOID;
 448        dev->mtu                = 1500;
 449        dev->tx_queue_len       = 100;
 450        dev->flags              = IFF_NOARP;
 451        dev->hard_header_len    = LL_MAX_HEADER;
 452}
 453
 454static LIST_HEAD(master_dev_list);
 455static int max_equalizers = 1;
 456module_param(max_equalizers, int, 0);
 457MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 458
 459static int __init teql_init(void)
 460{
 461        int i;
 462        int err = -ENODEV;
 463
 464        for (i = 0; i < max_equalizers; i++) {
 465                struct net_device *dev;
 466                struct teql_master *master;
 467
 468                dev = alloc_netdev(sizeof(struct teql_master),
 469                                  "teql%d", teql_master_setup);
 470                if (!dev) {
 471                        err = -ENOMEM;
 472                        break;
 473                }
 474
 475                if ((err = register_netdev(dev))) {
 476                        free_netdev(dev);
 477                        break;
 478                }
 479
 480                master = netdev_priv(dev);
 481
 482                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 483                err = register_qdisc(&master->qops);
 484
 485                if (err) {
 486                        unregister_netdev(dev);
 487                        free_netdev(dev);
 488                        break;
 489                }
 490
 491                list_add_tail(&master->master_list, &master_dev_list);
 492        }
 493        return i ? 0 : err;
 494}
 495
 496static void __exit teql_exit(void)
 497{
 498        struct teql_master *master, *nxt;
 499
 500        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 501
 502                list_del(&master->master_list);
 503
 504                unregister_qdisc(&master->qops);
 505                unregister_netdev(master->dev);
 506                free_netdev(master->dev);
 507        }
 508}
 509
 510module_init(teql_init);
 511module_exit(teql_exit);
 512
 513MODULE_LICENSE("GPL");
 514
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.