linux/net/sched/sch_teql.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
   3 *
   4 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/types.h>
   9#include <linux/kernel.h>
  10#include <linux/slab.h>
  11#include <linux/string.h>
  12#include <linux/errno.h>
  13#include <linux/if_arp.h>
  14#include <linux/netdevice.h>
  15#include <linux/init.h>
  16#include <linux/skbuff.h>
  17#include <linux/moduleparam.h>
  18#include <net/dst.h>
  19#include <net/neighbour.h>
  20#include <net/pkt_sched.h>
  21
  22/*
  23   How to setup it.
  24   ----------------
  25
  26   After loading this module you will find a new device teqlN
  27   and new qdisc with the same name. To join a slave to the equalizer
  28   you should just set this qdisc on a device f.e.
  29
  30   # tc qdisc add dev eth0 root teql0
  31   # tc qdisc add dev eth1 root teql0
  32
  33   That's all. Full PnP 8)
  34
  35   Applicability.
  36   --------------
  37
  38   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
  39      signal and generate EOI events. If you want to equalize virtual devices
  40      like tunnels, use a normal eql device.
  41   2. This device puts no limitations on physical slave characteristics
  42      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
  43      Certainly, large difference in link speeds will make the resulting
  44      eqalized link unusable, because of huge packet reordering.
  45      I estimate an upper useful difference as ~10 times.
  46   3. If the slave requires address resolution, only protocols using
  47      neighbour cache (IPv4/IPv6) will work over the equalized link.
  48      Other protocols are still allowed to use the slave device directly,
  49      which will not break load balancing, though native slave
  50      traffic will have the highest priority.  */
  51
  52struct teql_master {
  53        struct Qdisc_ops qops;
  54        struct net_device *dev;
  55        struct Qdisc *slaves;
  56        struct list_head master_list;
  57        unsigned long   tx_bytes;
  58        unsigned long   tx_packets;
  59        unsigned long   tx_errors;
  60        unsigned long   tx_dropped;
  61};
  62
  63struct teql_sched_data {
  64        struct Qdisc *next;
  65        struct teql_master *m;
  66        struct sk_buff_head q;
  67};
  68
  69#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  70
  71#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  72
  73/* "teql*" qdisc routines */
  74
  75static int
  76teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
  77{
  78        struct net_device *dev = qdisc_dev(sch);
  79        struct teql_sched_data *q = qdisc_priv(sch);
  80
  81        if (q->q.qlen < dev->tx_queue_len) {
  82                __skb_queue_tail(&q->q, skb);
  83                return NET_XMIT_SUCCESS;
  84        }
  85
  86        return qdisc_drop(skb, sch, to_free);
  87}
  88
  89static struct sk_buff *
  90teql_dequeue(struct Qdisc *sch)
  91{
  92        struct teql_sched_data *dat = qdisc_priv(sch);
  93        struct netdev_queue *dat_queue;
  94        struct sk_buff *skb;
  95        struct Qdisc *q;
  96
  97        skb = __skb_dequeue(&dat->q);
  98        dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
  99        q = rcu_dereference_bh(dat_queue->qdisc);
 100
 101        if (skb == NULL) {
 102                struct net_device *m = qdisc_dev(q);
 103                if (m) {
 104                        dat->m->slaves = sch;
 105                        netif_wake_queue(m);
 106                }
 107        } else {
 108                qdisc_bstats_update(sch, skb);
 109        }
 110        sch->q.qlen = dat->q.qlen + q->q.qlen;
 111        return skb;
 112}
 113
 114static struct sk_buff *
 115teql_peek(struct Qdisc *sch)
 116{
 117        /* teql is meant to be used as root qdisc */
 118        return NULL;
 119}
 120
 121static void
 122teql_reset(struct Qdisc *sch)
 123{
 124        struct teql_sched_data *dat = qdisc_priv(sch);
 125
 126        skb_queue_purge(&dat->q);
 127}
 128
 129static void
 130teql_destroy(struct Qdisc *sch)
 131{
 132        struct Qdisc *q, *prev;
 133        struct teql_sched_data *dat = qdisc_priv(sch);
 134        struct teql_master *master = dat->m;
 135
 136        if (!master)
 137                return;
 138
 139        prev = master->slaves;
 140        if (prev) {
 141                do {
 142                        q = NEXT_SLAVE(prev);
 143                        if (q == sch) {
 144                                NEXT_SLAVE(prev) = NEXT_SLAVE(q);
 145                                if (q == master->slaves) {
 146                                        master->slaves = NEXT_SLAVE(q);
 147                                        if (q == master->slaves) {
 148                                                struct netdev_queue *txq;
 149                                                spinlock_t *root_lock;
 150
 151                                                txq = netdev_get_tx_queue(master->dev, 0);
 152                                                master->slaves = NULL;
 153
 154                                                root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 155                                                spin_lock_bh(root_lock);
 156                                                qdisc_reset(rtnl_dereference(txq->qdisc));
 157                                                spin_unlock_bh(root_lock);
 158                                        }
 159                                }
 160                                skb_queue_purge(&dat->q);
 161                                break;
 162                        }
 163
 164                } while ((prev = q) != master->slaves);
 165        }
 166}
 167
 168static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
 169                           struct netlink_ext_ack *extack)
 170{
 171        struct net_device *dev = qdisc_dev(sch);
 172        struct teql_master *m = (struct teql_master *)sch->ops;
 173        struct teql_sched_data *q = qdisc_priv(sch);
 174
 175        if (dev->hard_header_len > m->dev->hard_header_len)
 176                return -EINVAL;
 177
 178        if (m->dev == dev)
 179                return -ELOOP;
 180
 181        q->m = m;
 182
 183        skb_queue_head_init(&q->q);
 184
 185        if (m->slaves) {
 186                if (m->dev->flags & IFF_UP) {
 187                        if ((m->dev->flags & IFF_POINTOPOINT &&
 188                             !(dev->flags & IFF_POINTOPOINT)) ||
 189                            (m->dev->flags & IFF_BROADCAST &&
 190                             !(dev->flags & IFF_BROADCAST)) ||
 191                            (m->dev->flags & IFF_MULTICAST &&
 192                             !(dev->flags & IFF_MULTICAST)) ||
 193                            dev->mtu < m->dev->mtu)
 194                                return -EINVAL;
 195                } else {
 196                        if (!(dev->flags&IFF_POINTOPOINT))
 197                                m->dev->flags &= ~IFF_POINTOPOINT;
 198                        if (!(dev->flags&IFF_BROADCAST))
 199                                m->dev->flags &= ~IFF_BROADCAST;
 200                        if (!(dev->flags&IFF_MULTICAST))
 201                                m->dev->flags &= ~IFF_MULTICAST;
 202                        if (dev->mtu < m->dev->mtu)
 203                                m->dev->mtu = dev->mtu;
 204                }
 205                q->next = NEXT_SLAVE(m->slaves);
 206                NEXT_SLAVE(m->slaves) = sch;
 207        } else {
 208                q->next = sch;
 209                m->slaves = sch;
 210                m->dev->mtu = dev->mtu;
 211                m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
 212        }
 213        return 0;
 214}
 215
 216
 217static int
 218__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 219               struct net_device *dev, struct netdev_queue *txq,
 220               struct dst_entry *dst)
 221{
 222        struct neighbour *n;
 223        int err = 0;
 224
 225        n = dst_neigh_lookup_skb(dst, skb);
 226        if (!n)
 227                return -ENOENT;
 228
 229        if (dst->dev != dev) {
 230                struct neighbour *mn;
 231
 232                mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
 233                neigh_release(n);
 234                if (IS_ERR(mn))
 235                        return PTR_ERR(mn);
 236                n = mn;
 237        }
 238
 239        if (neigh_event_send(n, skb_res) == 0) {
 240                int err;
 241                char haddr[MAX_ADDR_LEN];
 242
 243                neigh_ha_snapshot(haddr, n, dev);
 244                err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
 245                                      haddr, NULL, skb->len);
 246
 247                if (err < 0)
 248                        err = -EINVAL;
 249        } else {
 250                err = (skb_res == NULL) ? -EAGAIN : 1;
 251        }
 252        neigh_release(n);
 253        return err;
 254}
 255
 256static inline int teql_resolve(struct sk_buff *skb,
 257                               struct sk_buff *skb_res,
 258                               struct net_device *dev,
 259                               struct netdev_queue *txq)
 260{
 261        struct dst_entry *dst = skb_dst(skb);
 262        int res;
 263
 264        if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 265                return -ENODEV;
 266
 267        if (!dev->header_ops || !dst)
 268                return 0;
 269
 270        rcu_read_lock();
 271        res = __teql_resolve(skb, skb_res, dev, txq, dst);
 272        rcu_read_unlock();
 273
 274        return res;
 275}
 276
 277static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 278{
 279        struct teql_master *master = netdev_priv(dev);
 280        struct Qdisc *start, *q;
 281        int busy;
 282        int nores;
 283        int subq = skb_get_queue_mapping(skb);
 284        struct sk_buff *skb_res = NULL;
 285
 286        start = master->slaves;
 287
 288restart:
 289        nores = 0;
 290        busy = 0;
 291
 292        q = start;
 293        if (!q)
 294                goto drop;
 295
 296        do {
 297                struct net_device *slave = qdisc_dev(q);
 298                struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 299
 300                if (slave_txq->qdisc_sleeping != q)
 301                        continue;
 302                if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 303                    !netif_running(slave)) {
 304                        busy = 1;
 305                        continue;
 306                }
 307
 308                switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
 309                case 0:
 310                        if (__netif_tx_trylock(slave_txq)) {
 311                                unsigned int length = qdisc_pkt_len(skb);
 312
 313                                if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 314                                    netdev_start_xmit(skb, slave, slave_txq, false) ==
 315                                    NETDEV_TX_OK) {
 316                                        __netif_tx_unlock(slave_txq);
 317                                        master->slaves = NEXT_SLAVE(q);
 318                                        netif_wake_queue(dev);
 319                                        master->tx_packets++;
 320                                        master->tx_bytes += length;
 321                                        return NETDEV_TX_OK;
 322                                }
 323                                __netif_tx_unlock(slave_txq);
 324                        }
 325                        if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 326                                busy = 1;
 327                        break;
 328                case 1:
 329                        master->slaves = NEXT_SLAVE(q);
 330                        return NETDEV_TX_OK;
 331                default:
 332                        nores = 1;
 333                        break;
 334                }
 335                __skb_pull(skb, skb_network_offset(skb));
 336        } while ((q = NEXT_SLAVE(q)) != start);
 337
 338        if (nores && skb_res == NULL) {
 339                skb_res = skb;
 340                goto restart;
 341        }
 342
 343        if (busy) {
 344                netif_stop_queue(dev);
 345                return NETDEV_TX_BUSY;
 346        }
 347        master->tx_errors++;
 348
 349drop:
 350        master->tx_dropped++;
 351        dev_kfree_skb(skb);
 352        return NETDEV_TX_OK;
 353}
 354
 355static int teql_master_open(struct net_device *dev)
 356{
 357        struct Qdisc *q;
 358        struct teql_master *m = netdev_priv(dev);
 359        int mtu = 0xFFFE;
 360        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 361
 362        if (m->slaves == NULL)
 363                return -EUNATCH;
 364
 365        flags = FMASK;
 366
 367        q = m->slaves;
 368        do {
 369                struct net_device *slave = qdisc_dev(q);
 370
 371                if (slave == NULL)
 372                        return -EUNATCH;
 373
 374                if (slave->mtu < mtu)
 375                        mtu = slave->mtu;
 376                if (slave->hard_header_len > LL_MAX_HEADER)
 377                        return -EINVAL;
 378
 379                /* If all the slaves are BROADCAST, master is BROADCAST
 380                   If all the slaves are PtP, master is PtP
 381                   Otherwise, master is NBMA.
 382                 */
 383                if (!(slave->flags&IFF_POINTOPOINT))
 384                        flags &= ~IFF_POINTOPOINT;
 385                if (!(slave->flags&IFF_BROADCAST))
 386                        flags &= ~IFF_BROADCAST;
 387                if (!(slave->flags&IFF_MULTICAST))
 388                        flags &= ~IFF_MULTICAST;
 389        } while ((q = NEXT_SLAVE(q)) != m->slaves);
 390
 391        m->dev->mtu = mtu;
 392        m->dev->flags = (m->dev->flags&~FMASK) | flags;
 393        netif_start_queue(m->dev);
 394        return 0;
 395}
 396
 397static int teql_master_close(struct net_device *dev)
 398{
 399        netif_stop_queue(dev);
 400        return 0;
 401}
 402
 403static void teql_master_stats64(struct net_device *dev,
 404                                struct rtnl_link_stats64 *stats)
 405{
 406        struct teql_master *m = netdev_priv(dev);
 407
 408        stats->tx_packets       = m->tx_packets;
 409        stats->tx_bytes         = m->tx_bytes;
 410        stats->tx_errors        = m->tx_errors;
 411        stats->tx_dropped       = m->tx_dropped;
 412}
 413
 414static int teql_master_mtu(struct net_device *dev, int new_mtu)
 415{
 416        struct teql_master *m = netdev_priv(dev);
 417        struct Qdisc *q;
 418
 419        q = m->slaves;
 420        if (q) {
 421                do {
 422                        if (new_mtu > qdisc_dev(q)->mtu)
 423                                return -EINVAL;
 424                } while ((q = NEXT_SLAVE(q)) != m->slaves);
 425        }
 426
 427        dev->mtu = new_mtu;
 428        return 0;
 429}
 430
 431static const struct net_device_ops teql_netdev_ops = {
 432        .ndo_open       = teql_master_open,
 433        .ndo_stop       = teql_master_close,
 434        .ndo_start_xmit = teql_master_xmit,
 435        .ndo_get_stats64 = teql_master_stats64,
 436        .ndo_change_mtu = teql_master_mtu,
 437};
 438
 439static __init void teql_master_setup(struct net_device *dev)
 440{
 441        struct teql_master *master = netdev_priv(dev);
 442        struct Qdisc_ops *ops = &master->qops;
 443
 444        master->dev     = dev;
 445        ops->priv_size  = sizeof(struct teql_sched_data);
 446
 447        ops->enqueue    =       teql_enqueue;
 448        ops->dequeue    =       teql_dequeue;
 449        ops->peek       =       teql_peek;
 450        ops->init       =       teql_qdisc_init;
 451        ops->reset      =       teql_reset;
 452        ops->destroy    =       teql_destroy;
 453        ops->owner      =       THIS_MODULE;
 454
 455        dev->netdev_ops =       &teql_netdev_ops;
 456        dev->type               = ARPHRD_VOID;
 457        dev->mtu                = 1500;
 458        dev->min_mtu            = 68;
 459        dev->max_mtu            = 65535;
 460        dev->tx_queue_len       = 100;
 461        dev->flags              = IFF_NOARP;
 462        dev->hard_header_len    = LL_MAX_HEADER;
 463        netif_keep_dst(dev);
 464}
 465
 466static LIST_HEAD(master_dev_list);
 467static int max_equalizers = 1;
 468module_param(max_equalizers, int, 0);
 469MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
 470
 471static int __init teql_init(void)
 472{
 473        int i;
 474        int err = -ENODEV;
 475
 476        for (i = 0; i < max_equalizers; i++) {
 477                struct net_device *dev;
 478                struct teql_master *master;
 479
 480                dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
 481                                   NET_NAME_UNKNOWN, teql_master_setup);
 482                if (!dev) {
 483                        err = -ENOMEM;
 484                        break;
 485                }
 486
 487                if ((err = register_netdev(dev))) {
 488                        free_netdev(dev);
 489                        break;
 490                }
 491
 492                master = netdev_priv(dev);
 493
 494                strlcpy(master->qops.id, dev->name, IFNAMSIZ);
 495                err = register_qdisc(&master->qops);
 496
 497                if (err) {
 498                        unregister_netdev(dev);
 499                        free_netdev(dev);
 500                        break;
 501                }
 502
 503                list_add_tail(&master->master_list, &master_dev_list);
 504        }
 505        return i ? 0 : err;
 506}
 507
 508static void __exit teql_exit(void)
 509{
 510        struct teql_master *master, *nxt;
 511
 512        list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
 513
 514                list_del(&master->master_list);
 515
 516                unregister_qdisc(&master->qops);
 517                unregister_netdev(master->dev);
 518                free_netdev(master->dev);
 519        }
 520}
 521
 522module_init(teql_init);
 523module_exit(teql_exit);
 524
 525MODULE_LICENSE("GPL");
 526