linux/net/sched/sch_teql.c
<<
>>
Prefs
   1/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   2 *
   3 *              This program is free software; you can redistribute it and/or
   4 *              modify it under the terms of the GNU General Public License
   5 *              as published by the Free Software Foundation; either version
   6 *              2 of the License, or (at your option) any later version.
   7 *
   8 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/types.h>
  13#include <linux/kernel.h>
  14#include <linux/slab.h>
  15#include <linux/string.h>
  16#include <linux/errno.h>
  17#include <linux/if_arp.h>
  18#include <linux/netdevice.h>
  19#include <linux/init.h>
  20#include <linux/skbuff.h>
  21#include <linux/moduleparam.h>
  22#include <net/dst.h>
  23#include <net/neighbour.h>
  24#include <net/pkt_sched.h>
  25
  26/*
  27   How to setup it.
  28   ----------------
  29
  30   After loading this module you will find a new device teqlN
  31   and new qdisc with the same name. To join a slave to the equalizer
  32   you should just set this qdisc on a device f.e.
  33
  34   # tc qdisc add dev eth0 root teql0
  35   # tc qdisc add dev eth1 root teql0
  36
  37   That's all. Full PnP 8)
  38
  39   Applicability.
  40   --------------
  41
  42   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  43      signal and generate EOI events. If you want to equalize virtual devices
  44      like tunnels, use a normal eql device.
  45   2. This device puts no limitations on physical slave characteristics
  46      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  47      Certainly, large difference in link speeds will make the resulting
  48      eqalized link unusable, because of huge packet reordering.
  49      I estimate an upper useful difference as ~10 times.
  50   3. If the slave requires address resolution, only protocols using
  51      neighbour cache (IPv4/IPv6) will work over the equalized link.
  52      Other protocols are still allowed to use the slave device directly,
  53      which will not break load balancing, though native slave
  54      traffic will have the highest priority.  */
  55
  56struct teql_master {
  57        struct Qdisc_ops qops;
  58        struct net_device *dev;
  59        struct Qdisc *slaves;
  60        struct list_head master_list;
  61        unsigned long   tx_bytes;
  62        unsigned long   tx_packets;
  63        unsigned long   tx_errors;
  64        unsigned long   tx_dropped;
  65};
  66
  67struct teql_sched_data {
  68        struct Qdisc *next;
  69        struct teql_master *m;
  70        struct neighbour *ncache;
  71        struct sk_buff_head q;
  72};
  73
  74#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  75
  76#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  77
  78/* "teql*" qdisc routines */
  79
  80static int
  81teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  82{
  83        struct net_device *dev = qdisc_dev(sch);
  84        struct teql_sched_data *q = qdisc_priv(sch);
  85
  86        if (q->q.qlen < dev->tx_queue_len) {
  87                __skb_queue_tail(&q->q, skb);
  88                return NET_XMIT_SUCCESS;
  89        }
  90
  91        kfree_skb(skb);
  92        sch->qstats.drops++;
  93        return NET_XMIT_DROP;
  94}
  95
  96static struct sk_buff *
  97teql_dequeue(struct Qdisc *sch)
  98{
  99        struct teql_sched_data *dat = qdisc_priv(sch);
 100        struct netdev_queue *dat_queue;
 101        struct sk_buff *skb;
 102
 103        skb = __skb_dequeue(&dat->q);
 104        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 105        if (skb == NULL) {
 106                struct net_device *m = qdisc_dev(dat_queue->qdisc);
 107                if (m) {
 108                        dat->m->slaves = sch;
 109                        netif_wake_queue(m);
 110                }
 111        } else {
 112                qdisc_bstats_update(sch, skb);
 113        }
 114        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
 115        return skb;
 116}
 117
 118static struct sk_buff *
 119teql_peek(struct Qdisc *sch)
 120{
 121        /* teql is meant to be used as root qdisc */
 122        return NULL;
 123}
 124
 125static inline void
 126teql_neigh_release(struct neighbour *n)
 127{
 128        if (n)
 129                neigh_release(n);
 130}
 131
 132static void
 133teql_reset(struct Qdisc *sch)
 134{
 135        struct teql_sched_data *dat = qdisc_priv(sch);
 136
 137        skb_queue_purge(&dat->q);
 138        sch->q.qlen = 0;
 139        teql_neigh_release(xchg(&dat->ncache, NULL));
 140}
 141
 142static void
 143teql_destroy(struct Qdisc *sch)
 144{
 145        struct Qdisc *q, *prev;
 146        struct teql_sched_data *dat = qdisc_priv(sch);
 147        struct teql_master *master = dat->m;
 148
 149        prev = master->slaves;
 150        if (prev) {
 151                do {
 152                        q = NEXT_SLAVE(prev);
 153                        if (q == sch) {
 154                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 155                                if (q == master->slaves) {
 156                                        master->slaves = NEXT_SLAVE(q);
 157                                        if (q == master->slaves) {
 158                                                struct netdev_queue *txq;
 159                                                spinlock_t *root_lock;
 160
 161                                                txq = netdev_get_tx_queue(master->dev, 0);
 162                                                master->slaves = NULL;
 163
 164                                                root_lock = qdisc_root_sleeping_lock(txq->qdisc);
 165                                                spin_lock_bh(root_lock);
 166                                                qdisc_reset(txq->qdisc);
 167                                                spin_unlock_bh(root_lock);
 168                                        }
 169                                }
 170                                skb_queue_purge(&dat->q);
 171                                teql_neigh_release(xchg(&dat->ncache, NULL));
 172                                break;
 173                        }
 174
 175                } while ((prev = q) != master->slaves);
 176        }
 177}
 178
 179static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 180{
 181        struct net_device *dev = qdisc_dev(sch);
 182        struct teql_master *m = (struct teql_master *)sch->ops;
 183        struct teql_sched_data *q = qdisc_priv(sch);
 184
 185        if (dev->hard_header_len > m->dev->hard_header_len)
 186                return -EINVAL;
 187
 188        if (m->dev == dev)
 189                return -ELOOP;
 190
 191        q->m = m;
 192
 193        skb_queue_head_init(&q->q);
 194
 195        if (m->slaves) {
 196                if (m->dev->flags & IFF_UP) {
 197                        if ((m->dev->flags & IFF_POINTOPOINT &&
 198                             !(dev->flags & IFF_POINTOPOINT)) ||
 199                            (m->dev->flags & IFF_BROADCAST &&
 200                             !(dev->flags & IFF_BROADCAST)) ||
 201                            (m->dev->flags & IFF_MULTICAST &&
 202                             !(dev->flags & IFF_MULTICAST)) ||
 203                            dev->mtu < m->dev->mtu)
 204                                return -EINVAL;
 205                } else {
 206                        if (!(dev->flags&IFF_POINTOPOINT))
 207                                m->dev->flags &= ~IFF_POINTOPOINT;
 208                        if (!(dev->flags&IFF_BROADCAST))
 209                                m->dev->flags &= ~IFF_BROADCAST;
 210                        if (!(dev->flags&IFF_MULTICAST))
 211                                m->dev->flags &= ~IFF_MULTICAST;
 212                        if (dev->mtu < m->dev->mtu)
 213                                m->dev->mtu = dev->mtu;
 214                }
 215                q->next = NEXT_SLAVE(m->slaves);
 216                NEXT_SLAVE(m->slaves) = sch;
 217        } else {
 218                q->next = sch;
 219                m->slaves = sch;
 220                m->dev->mtu = dev->mtu;
 221                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 222        }
 223        return 0;
 224}
 225
 226
 227static int
 228__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 229               struct net_device *dev, struct netdev_queue *txq,
 230               struct neighbour *mn)
 231{
 232        struct teql_sched_data *q = qdisc_priv(txq->qdisc);
 233        struct neighbour *n = q->ncache;
 234
 235        if (mn->tbl == NULL)
 236                return -EINVAL;
 237        if (n && n->tbl == mn->tbl &&
 238            memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
 239                atomic_inc(&n->refcnt);
 240        } else {
 241                n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
 242                if (IS_ERR(n))
 243                        return PTR_ERR(n);
 244        }
 245        if (neigh_event_send(n, skb_res) == 0) {
 246                int err;
 247                char haddr[MAX_ADDR_LEN];
 248
 249                neigh_ha_snapshot(haddr, n, dev);
 250                err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
 251                                      NULL, skb->len);
 252
 253                if (err < 0) {
 254                        neigh_release(n);
 255                        return -EINVAL;
 256                }
 257                teql_neigh_release(xchg(&q->ncache, n));
 258                return 0;
 259        }
 260        neigh_release(n);
 261        return (skb_res == NULL) ? -EAGAIN : 1;
 262}
 263
 264static inline int teql_resolve(struct sk_buff *skb,
 265                               struct sk_buff *skb_res,
 266                               struct net_device *dev,
 267                               struct netdev_queue *txq)
 268{
 269        struct dst_entry *dst = skb_dst(skb);
 270        struct neighbour *mn;
 271        int res;
 272
 273        if (txq->qdisc == &noop_qdisc)
 274                return -ENODEV;
 275
 276        if (!dev->header_ops || !dst)
 277                return 0;
 278
 279        rcu_read_lock();
 280        mn = dst_get_neighbour_noref(dst);
 281        res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
 282        rcu_read_unlock();
 283
 284        return res;
 285}
 286
 287static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 288{
 289        struct teql_master *master = netdev_priv(dev);
 290        struct Qdisc *start, *q;
 291        int busy;
 292        int nores;
 293        int subq = skb_get_queue_mapping(skb);
 294        struct sk_buff *skb_res = NULL;
 295
 296        start = master->slaves;
 297
 298restart:
 299        nores = 0;
 300        busy = 0;
 301
 302        q = start;
 303        if (!q)
 304                goto drop;
 305
 306        do {
 307                struct net_device *slave = qdisc_dev(q);
 308                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 309                const struct net_device_ops *slave_ops = slave->netdev_ops;
 310
 311                if (slave_txq->qdisc_sleeping != q)
 312                        continue;
 313                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 314                    !netif_running(slave)) {
 315                        busy = 1;
 316                        continue;
 317                }
 318
 319                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 320                case 0:
 321                        if (__netif_tx_trylock(slave_txq)) {
 322                                unsigned int length = qdisc_pkt_len(skb);
 323
 324                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 325                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 326                                        txq_trans_update(slave_txq);
 327                                        __netif_tx_unlock(slave_txq);
 328                                        master->slaves = NEXT_SLAVE(q);
 329                                        netif_wake_queue(dev);
 330                                        master->tx_packets++;
 331                                        master->tx_bytes += length;
 332                                        return NETDEV_TX_OK;
 333                                }
 334                                __netif_tx_unlock(slave_txq);
 335                        }
 336                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 337                                busy = 1;
 338                        break;
 339                case 1:
 340                        master->slaves = NEXT_SLAVE(q);
 341                        return NETDEV_TX_OK;
 342                default:
 343                        nores = 1;
 344                        break;
 345                }
 346                __skb_pull(skb, skb_network_offset(skb));
 347        } while ((q = NEXT_SLAVE(q)) != start);
 348
 349        if (nores && skb_res == NULL) {
 350                skb_res = skb;
 351                goto restart;
 352        }
 353
 354        if (busy) {
 355                netif_stop_queue(dev);
 356                return NETDEV_TX_BUSY;
 357        }
 358        master->tx_errors++;
 359
 360drop:
 361        master->tx_dropped++;
 362        dev_kfree_skb(skb);
 363        return NETDEV_TX_OK;
 364}
 365
 366static int teql_master_open(struct net_device *dev)
 367{
 368        struct Qdisc *q;
 369        struct teql_master *m = netdev_priv(dev);
 370        int mtu = 0xFFFE;
 371        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 372
 373        if (m->slaves == NULL)
 374                return -EUNATCH;
 375
 376        flags = FMASK;
 377
 378        q = m->slaves;
 379        do {
 380                struct net_device *slave = qdisc_dev(q);
 381
 382                if (slave == NULL)
 383                        return -EUNATCH;
 384
 385                if (slave->mtu < mtu)
 386                        mtu = slave->mtu;
 387                if (slave->hard_header_len > LL_MAX_HEADER)
 388                        return -EINVAL;
 389
 390                /* If all the slaves are BROADCAST, master is BROADCAST
 391                   If all the slaves are PtP, master is PtP
 392                   Otherwise, master is NBMA.
 393                 */
 394                if (!(slave->flags&IFF_POINTOPOINT))
 395                        flags &= ~IFF_POINTOPOINT;
 396                if (!(slave->flags&IFF_BROADCAST))
 397                        flags &= ~IFF_BROADCAST;
 398                if (!(slave->flags&IFF_MULTICAST))
 399                        flags &= ~IFF_MULTICAST;
 400        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 401
 402        m->dev->mtu = mtu;
 403        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 404        netif_start_queue(m->dev);
 405        return 0;
 406}
 407
 408static int teql_master_close(struct net_device *dev)
 409{
 410        netif_stop_queue(dev);
 411        return 0;
 412}
 413
 414static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
 415                                                     struct rtnl_link_stats64 *stats)
 416{
 417        struct teql_master *m = netdev_priv(dev);
 418
 419        stats->tx_packets       = m->tx_packets;
 420        stats->tx_bytes         = m->tx_bytes;
 421        stats->tx_errors        = m->tx_errors;
 422        stats->tx_dropped       = m->tx_dropped;
 423        return stats;
 424}
 425
 426static int teql_master_mtu(struct net_device *dev, int new_mtu)
 427{
 428        struct teql_master *m = netdev_priv(dev);
 429        struct Qdisc *q;
 430
 431        if (new_mtu < 68)
 432                return -EINVAL;
 433
 434        q = m->slaves;
 435        if (q) {
 436                do {
 437                        if (new_mtu > qdisc_dev(q)->mtu)
 438                                return -EINVAL;
 439                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 440        }
 441
 442        dev->mtu = new_mtu;
 443        return 0;
 444}
 445
 446static const struct net_device_ops teql_netdev_ops = {
 447        .ndo_open       = teql_master_open,
 448        .ndo_stop       = teql_master_close,
 449        .ndo_start_xmit = teql_master_xmit,
 450        .ndo_get_stats64 = teql_master_stats64,
 451        .ndo_change_mtu = teql_master_mtu,
 452};
 453
 454static __init void teql_master_setup(struct net_device *dev)
 455{
 456        struct teql_master *master = netdev_priv(dev);
 457        struct Qdisc_ops *ops = &master->qops;
 458
 459        master->dev     = dev;
 460        ops->priv_size  = sizeof(struct teql_sched_data);
 461
 462        ops->enqueue    =       teql_enqueue;
 463        ops->dequeue    =       teql_dequeue;
 464        ops->peek       =       teql_peek;
 465        ops->init       =       teql_qdisc_init;
 466        ops->reset      =       teql_reset;
 467        ops->destroy    =       teql_destroy;
 468        ops->owner      =       THIS_MODULE;
 469
 470        dev->netdev_ops =       &teql_netdev_ops;
 471        dev->type               = ARPHRD_VOID;
 472        dev->mtu                = 1500;
 473        dev->tx_queue_len       = 100;
 474        dev->flags              = IFF_NOARP;
 475        dev->hard_header_len    = LL_MAX_HEADER;
 476        dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 477}
 478
 479static LIST_HEAD(master_dev_list);
 480static int max_equalizers = 1;
 481module_param(max_equalizers, int, 0);
 482MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 483
 484static int __init teql_init(void)
 485{
 486        int i;
 487        int err = -ENODEV;
 488
 489        for (i = 0; i < max_equalizers; i++) {
 490                struct net_device *dev;
 491                struct teql_master *master;
 492
 493                dev = alloc_netdev(sizeof(struct teql_master),
 494                                  "teql%d", teql_master_setup);
 495                if (!dev) {
 496                        err = -ENOMEM;
 497                        break;
 498                }
 499
 500                if ((err = register_netdev(dev))) {
 501                        free_netdev(dev);
 502                        break;
 503                }
 504
 505                master = netdev_priv(dev);
 506
 507                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 508                err = register_qdisc(&master->qops);
 509
 510                if (err) {
 511                        unregister_netdev(dev);
 512                        free_netdev(dev);
 513                        break;
 514                }
 515
 516                list_add_tail(&master->master_list, &master_dev_list);
 517        }
 518        return i ? 0 : err;
 519}
 520
 521static void __exit teql_exit(void)
 522{
 523        struct teql_master *master, *nxt;
 524
 525        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 526
 527                list_del(&master->master_list);
 528
 529                unregister_qdisc(&master->qops);
 530                unregister_netdev(master->dev);
 531                free_netdev(master->dev);
 532        }
 533}
 534
 535module_init(teql_init);
 536module_exit(teql_exit);
 537
 538MODULE_LICENSE("GPL");
 539
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.