linux/net/sched/sch_cbq.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <net/netlink.h>
  21#include <net/pkt_sched.h>
  22
  23
  24/*      Class-Based Queueing (CBQ) algorithm.
  25        =======================================
  26
  27        Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28                 Management Models for Packet Networks",
  29                 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31                 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33                 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34                 Parameters", 1996
  35
  36                 [4] Sally Floyd and Michael Speer, "Experimental Results
  37                 for Class-Based Queueing", 1998, not published.
  38
  39        -----------------------------------------------------------------------
  40
  41        Algorithm skeleton was taken from NS simulator cbq.cc.
  42        If someone wants to check this code against the LBL version,
  43        he should take into account that ONLY the skeleton was borrowed,
  44        the implementation is different. Particularly:
  45
  46        --- The WRR algorithm is different. Our version looks more
  47        reasonable (I hope) and works when quanta are allowed to be
  48        less than MTU, which is always the case when real time classes
  49        have small rates. Note, that the statement of [3] is
  50        incomplete, delay may actually be estimated even if class
  51        per-round allotment is less than MTU. Namely, if per-round
  52        allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54        delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56        In the worst case we have IntServ estimate with D = W*r+k*MTU
  57        and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60        --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61        interpret some places, which look like wrong translations
  62        from NS. Anyone is advised to find these differences
  63        and explain to me, why I am wrong 8).
  64
  65        --- Linux has no EOI event, so that we cannot estimate true class
  66        idle time. Workaround is to consider the next dequeue event
  67        as sign that previous packet is finished. This is wrong because of
  68        internal device queueing, but on a permanently loaded link it is true.
  69        Moreover, combined with clock integrator, this scheme looks
  70        very close to an ideal solution.  */
  71
  72struct cbq_sched_data;
  73
  74
  75struct cbq_class
  76{
  77        struct Qdisc_class_common common;
  78        struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  79
  80/* Parameters */
  81        unsigned char           priority;       /* class priority */
  82        unsigned char           priority2;      /* priority to be used after overlimit */
  83        unsigned char           ewma_log;       /* time constant for idle time calculation */
  84        unsigned char           ovl_strategy;
  85#ifdef CONFIG_NET_CLS_ACT
  86        unsigned char           police;
  87#endif
  88
  89        u32                     defmap;
  90
  91        /* Link-sharing scheduler parameters */
  92        long                    maxidle;        /* Class parameters: see below. */
  93        long                    offtime;
  94        long                    minidle;
  95        u32                     avpkt;
  96        struct qdisc_rate_table *R_tab;
  97
  98        /* Overlimit strategy parameters */
  99        void                    (*overlimit)(struct cbq_class *cl);
 100        psched_tdiff_t          penalty;
 101
 102        /* General scheduler (WRR) parameters */
 103        long                    allot;
 104        long                    quantum;        /* Allotment per WRR round */
 105        long                    weight;         /* Relative allotment: see below */
 106
 107        struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 108        struct cbq_class        *split;         /* Ptr to split node */
 109        struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 110        struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 111        struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 112                                                   parent otherwise */
 113        struct cbq_class        *sibling;       /* Sibling chain */
 114        struct cbq_class        *children;      /* Pointer to children chain */
 115
 116        struct Qdisc            *q;             /* Elementary queueing discipline */
 117
 118
 119/* Variables */
 120        unsigned char           cpriority;      /* Effective priority */
 121        unsigned char           delayed;
 122        unsigned char           level;          /* level of the class in hierarchy:
 123                                                   0 for leaf classes, and maximal
 124                                                   level of children + 1 for nodes.
 125                                                 */
 126
 127        psched_time_t           last;           /* Last end of service */
 128        psched_time_t           undertime;
 129        long                    avgidle;
 130        long                    deficit;        /* Saved deficit for WRR */
 131        psched_time_t           penalized;
 132        struct gnet_stats_basic_packed bstats;
 133        struct gnet_stats_queue qstats;
 134        struct gnet_stats_rate_est rate_est;
 135        struct tc_cbq_xstats    xstats;
 136
 137        struct tcf_proto        *filter_list;
 138
 139        int                     refcnt;
 140        int                     filters;
 141
 142        struct cbq_class        *defaults[TC_PRIO_MAX+1];
 143};
 144
 145struct cbq_sched_data
 146{
 147        struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 148        int                     nclasses[TC_CBQ_MAXPRIO+1];
 149        unsigned                quanta[TC_CBQ_MAXPRIO+1];
 150
 151        struct cbq_class        link;
 152
 153        unsigned                activemask;
 154        struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 155                                                                   with backlog */
 156
 157#ifdef CONFIG_NET_CLS_ACT
 158        struct cbq_class        *rx_class;
 159#endif
 160        struct cbq_class        *tx_class;
 161        struct cbq_class        *tx_borrowed;
 162        int                     tx_len;
 163        psched_time_t           now;            /* Cached timestamp */
 164        psched_time_t           now_rt;         /* Cached real time */
 165        unsigned                pmask;
 166
 167        struct hrtimer          delay_timer;
 168        struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 169                                                   started when CBQ has
 170                                                   backlog, but cannot
 171                                                   transmit just now */
 172        psched_tdiff_t          wd_expires;
 173        int                     toplevel;
 174        u32                     hgenerator;
 175};
 176
 177
 178#define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 179
 180static __inline__ struct cbq_class *
 181cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 182{
 183        struct Qdisc_class_common *clc;
 184
 185        clc = qdisc_class_find(&q->clhash, classid);
 186        if (clc == NULL)
 187                return NULL;
 188        return container_of(clc, struct cbq_class, common);
 189}
 190
 191#ifdef CONFIG_NET_CLS_ACT
 192
 193static struct cbq_class *
 194cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 195{
 196        struct cbq_class *cl, *new;
 197
 198        for (cl = this->tparent; cl; cl = cl->tparent)
 199                if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 200                        return new;
 201
 202        return NULL;
 203}
 204
 205#endif
 206
 207/* Classify packet. The procedure is pretty complicated, but
 208   it allows us to combine link sharing and priority scheduling
 209   transparently.
 210
 211   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212   so that it resolves to split nodes. Then packets are classified
 213   by logical priority, or a more specific classifier may be attached
 214   to the split node.
 215 */
 216
 217static struct cbq_class *
 218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219{
 220        struct cbq_sched_data *q = qdisc_priv(sch);
 221        struct cbq_class *head = &q->link;
 222        struct cbq_class **defmap;
 223        struct cbq_class *cl = NULL;
 224        u32 prio = skb->priority;
 225        struct tcf_result res;
 226
 227        /*
 228         *  Step 1. If skb->priority points to one of our classes, use it.
 229         */
 230        if (TC_H_MAJ(prio^sch->handle) == 0 &&
 231            (cl = cbq_class_lookup(q, prio)) != NULL)
 232                return cl;
 233
 234        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235        for (;;) {
 236                int result = 0;
 237                defmap = head->defaults;
 238
 239                /*
 240                 * Step 2+n. Apply classifier.
 241                 */
 242                if (!head->filter_list ||
 243                    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244                        goto fallback;
 245
 246                if ((cl = (void*)res.class) == NULL) {
 247                        if (TC_H_MAJ(res.classid))
 248                                cl = cbq_class_lookup(q, res.classid);
 249                        else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 250                                cl = defmap[TC_PRIO_BESTEFFORT];
 251
 252                        if (cl == NULL || cl->level >= head->level)
 253                                goto fallback;
 254                }
 255
 256#ifdef CONFIG_NET_CLS_ACT
 257                switch (result) {
 258                case TC_ACT_QUEUED:
 259                case TC_ACT_STOLEN:
 260                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 261                case TC_ACT_SHOT:
 262                        return NULL;
 263                case TC_ACT_RECLASSIFY:
 264                        return cbq_reclassify(skb, cl);
 265                }
 266#endif
 267                if (cl->level == 0)
 268                        return cl;
 269
 270                /*
 271                 * Step 3+n. If classifier selected a link sharing class,
 272                 *         apply agency specific classifier.
 273                 *         Repeat this procdure until we hit a leaf node.
 274                 */
 275                head = cl;
 276        }
 277
 278fallback:
 279        cl = head;
 280
 281        /*
 282         * Step 4. No success...
 283         */
 284        if (TC_H_MAJ(prio) == 0 &&
 285            !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 286            !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 287                return head;
 288
 289        return cl;
 290}
 291
 292/*
 293   A packet has just been enqueued on the empty class.
 294   cbq_activate_class adds it to the tail of active class list
 295   of its priority band.
 296 */
 297
 298static __inline__ void cbq_activate_class(struct cbq_class *cl)
 299{
 300        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 301        int prio = cl->cpriority;
 302        struct cbq_class *cl_tail;
 303
 304        cl_tail = q->active[prio];
 305        q->active[prio] = cl;
 306
 307        if (cl_tail != NULL) {
 308                cl->next_alive = cl_tail->next_alive;
 309                cl_tail->next_alive = cl;
 310        } else {
 311                cl->next_alive = cl;
 312                q->activemask |= (1<<prio);
 313        }
 314}
 315
 316/*
 317   Unlink class from active chain.
 318   Note that this same procedure is done directly in cbq_dequeue*
 319   during round-robin procedure.
 320 */
 321
 322static void cbq_deactivate_class(struct cbq_class *this)
 323{
 324        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 325        int prio = this->cpriority;
 326        struct cbq_class *cl;
 327        struct cbq_class *cl_prev = q->active[prio];
 328
 329        do {
 330                cl = cl_prev->next_alive;
 331                if (cl == this) {
 332                        cl_prev->next_alive = cl->next_alive;
 333                        cl->next_alive = NULL;
 334
 335                        if (cl == q->active[prio]) {
 336                                q->active[prio] = cl_prev;
 337                                if (cl == q->active[prio]) {
 338                                        q->active[prio] = NULL;
 339                                        q->activemask &= ~(1<<prio);
 340                                        return;
 341                                }
 342                        }
 343                        return;
 344                }
 345        } while ((cl_prev = cl) != q->active[prio]);
 346}
 347
 348static void
 349cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 350{
 351        int toplevel = q->toplevel;
 352
 353        if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 354                psched_time_t now;
 355                psched_tdiff_t incr;
 356
 357                now = psched_get_time();
 358                incr = now - q->now_rt;
 359                now = q->now + incr;
 360
 361                do {
 362                        if (cl->undertime < now) {
 363                                q->toplevel = cl->level;
 364                                return;
 365                        }
 366                } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 367        }
 368}
 369
 370static int
 371cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 372{
 373        struct cbq_sched_data *q = qdisc_priv(sch);
 374        int uninitialized_var(ret);
 375        struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377#ifdef CONFIG_NET_CLS_ACT
 378        q->rx_class = cl;
 379#endif
 380        if (cl == NULL) {
 381                if (ret & __NET_XMIT_BYPASS)
 382                        sch->qstats.drops++;
 383                kfree_skb(skb);
 384                return ret;
 385        }
 386
 387#ifdef CONFIG_NET_CLS_ACT
 388        cl->q->__parent = sch;
 389#endif
 390        ret = qdisc_enqueue(skb, cl->q);
 391        if (ret == NET_XMIT_SUCCESS) {
 392                sch->q.qlen++;
 393                sch->bstats.packets++;
 394                sch->bstats.bytes += qdisc_pkt_len(skb);
 395                cbq_mark_toplevel(q, cl);
 396                if (!cl->next_alive)
 397                        cbq_activate_class(cl);
 398                return ret;
 399        }
 400
 401        if (net_xmit_drop_count(ret)) {
 402                sch->qstats.drops++;
 403                cbq_mark_toplevel(q, cl);
 404                cl->qstats.drops++;
 405        }
 406        return ret;
 407}
 408
 409/* Overlimit actions */
 410
 411/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 412
 413static void cbq_ovl_classic(struct cbq_class *cl)
 414{
 415        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 416        psched_tdiff_t delay = cl->undertime - q->now;
 417
 418        if (!cl->delayed) {
 419                delay += cl->offtime;
 420
 421                /*
 422                   Class goes to sleep, so that it will have no
 423                   chance to work avgidle. Let's forgive it 8)
 424
 425                   BTW cbq-2.0 has a crap in this
 426                   place, apparently they forgot to shift it by cl->ewma_log.
 427                 */
 428                if (cl->avgidle < 0)
 429                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 430                if (cl->avgidle < cl->minidle)
 431                        cl->avgidle = cl->minidle;
 432                if (delay <= 0)
 433                        delay = 1;
 434                cl->undertime = q->now + delay;
 435
 436                cl->xstats.overactions++;
 437                cl->delayed = 1;
 438        }
 439        if (q->wd_expires == 0 || q->wd_expires > delay)
 440                q->wd_expires = delay;
 441
 442        /* Dirty work! We must schedule wakeups based on
 443           real available rate, rather than leaf rate,
 444           which may be tiny (even zero).
 445         */
 446        if (q->toplevel == TC_CBQ_MAXLEVEL) {
 447                struct cbq_class *b;
 448                psched_tdiff_t base_delay = q->wd_expires;
 449
 450                for (b = cl->borrow; b; b = b->borrow) {
 451                        delay = b->undertime - q->now;
 452                        if (delay < base_delay) {
 453                                if (delay <= 0)
 454                                        delay = 1;
 455                                base_delay = delay;
 456                        }
 457                }
 458
 459                q->wd_expires = base_delay;
 460        }
 461}
 462
 463/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 464   they go overlimit
 465 */
 466
 467static void cbq_ovl_rclassic(struct cbq_class *cl)
 468{
 469        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 470        struct cbq_class *this = cl;
 471
 472        do {
 473                if (cl->level > q->toplevel) {
 474                        cl = NULL;
 475                        break;
 476                }
 477        } while ((cl = cl->borrow) != NULL);
 478
 479        if (cl == NULL)
 480                cl = this;
 481        cbq_ovl_classic(cl);
 482}
 483
 484/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 485
 486static void cbq_ovl_delay(struct cbq_class *cl)
 487{
 488        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 489        psched_tdiff_t delay = cl->undertime - q->now;
 490
 491        if (test_bit(__QDISC_STATE_DEACTIVATED,
 492                     &qdisc_root_sleeping(cl->qdisc)->state))
 493                return;
 494
 495        if (!cl->delayed) {
 496                psched_time_t sched = q->now;
 497                ktime_t expires;
 498
 499                delay += cl->offtime;
 500                if (cl->avgidle < 0)
 501                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 502                if (cl->avgidle < cl->minidle)
 503                        cl->avgidle = cl->minidle;
 504                cl->undertime = q->now + delay;
 505
 506                if (delay > 0) {
 507                        sched += delay + cl->penalty;
 508                        cl->penalized = sched;
 509                        cl->cpriority = TC_CBQ_MAXPRIO;
 510                        q->pmask |= (1<<TC_CBQ_MAXPRIO);
 511
 512                        expires = ktime_set(0, 0);
 513                        expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 514                        if (hrtimer_try_to_cancel(&q->delay_timer) &&
 515                            ktime_to_ns(ktime_sub(
 516                                        hrtimer_get_expires(&q->delay_timer),
 517                                        expires)) > 0)
 518                                hrtimer_set_expires(&q->delay_timer, expires);
 519                        hrtimer_restart(&q->delay_timer);
 520                        cl->delayed = 1;
 521                        cl->xstats.overactions++;
 522                        return;
 523                }
 524                delay = 1;
 525        }
 526        if (q->wd_expires == 0 || q->wd_expires > delay)
 527                q->wd_expires = delay;
 528}
 529
 530/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 531
 532static void cbq_ovl_lowprio(struct cbq_class *cl)
 533{
 534        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 535
 536        cl->penalized = q->now + cl->penalty;
 537
 538        if (cl->cpriority != cl->priority2) {
 539                cl->cpriority = cl->priority2;
 540                q->pmask |= (1<<cl->cpriority);
 541                cl->xstats.overactions++;
 542        }
 543        cbq_ovl_classic(cl);
 544}
 545
 546/* TC_CBQ_OVL_DROP: penalize class by dropping */
 547
 548static void cbq_ovl_drop(struct cbq_class *cl)
 549{
 550        if (cl->q->ops->drop)
 551                if (cl->q->ops->drop(cl->q))
 552                        cl->qdisc->q.qlen--;
 553        cl->xstats.overactions++;
 554        cbq_ovl_classic(cl);
 555}
 556
 557static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 558                                       psched_time_t now)
 559{
 560        struct cbq_class *cl;
 561        struct cbq_class *cl_prev = q->active[prio];
 562        psched_time_t sched = now;
 563
 564        if (cl_prev == NULL)
 565                return 0;
 566
 567        do {
 568                cl = cl_prev->next_alive;
 569                if (now - cl->penalized > 0) {
 570                        cl_prev->next_alive = cl->next_alive;
 571                        cl->next_alive = NULL;
 572                        cl->cpriority = cl->priority;
 573                        cl->delayed = 0;
 574                        cbq_activate_class(cl);
 575
 576                        if (cl == q->active[prio]) {
 577                                q->active[prio] = cl_prev;
 578                                if (cl == q->active[prio]) {
 579                                        q->active[prio] = NULL;
 580                                        return 0;
 581                                }
 582                        }
 583
 584                        cl = cl_prev->next_alive;
 585                } else if (sched - cl->penalized > 0)
 586                        sched = cl->penalized;
 587        } while ((cl_prev = cl) != q->active[prio]);
 588
 589        return sched - now;
 590}
 591
 592static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 593{
 594        struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 595                                                delay_timer);
 596        struct Qdisc *sch = q->watchdog.qdisc;
 597        psched_time_t now;
 598        psched_tdiff_t delay = 0;
 599        unsigned pmask;
 600
 601        now = psched_get_time();
 602
 603        pmask = q->pmask;
 604        q->pmask = 0;
 605
 606        while (pmask) {
 607                int prio = ffz(~pmask);
 608                psched_tdiff_t tmp;
 609
 610                pmask &= ~(1<<prio);
 611
 612                tmp = cbq_undelay_prio(q, prio, now);
 613                if (tmp > 0) {
 614                        q->pmask |= 1<<prio;
 615                        if (tmp < delay || delay == 0)
 616                                delay = tmp;
 617                }
 618        }
 619
 620        if (delay) {
 621                ktime_t time;
 622
 623                time = ktime_set(0, 0);
 624                time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 625                hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 626        }
 627
 628        sch->flags &= ~TCQ_F_THROTTLED;
 629        __netif_schedule(qdisc_root(sch));
 630        return HRTIMER_NORESTART;
 631}
 632
 633#ifdef CONFIG_NET_CLS_ACT
 634static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 635{
 636        struct Qdisc *sch = child->__parent;
 637        struct cbq_sched_data *q = qdisc_priv(sch);
 638        struct cbq_class *cl = q->rx_class;
 639
 640        q->rx_class = NULL;
 641
 642        if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 643                int ret;
 644
 645                cbq_mark_toplevel(q, cl);
 646
 647                q->rx_class = cl;
 648                cl->q->__parent = sch;
 649
 650                ret = qdisc_enqueue(skb, cl->q);
 651                if (ret == NET_XMIT_SUCCESS) {
 652                        sch->q.qlen++;
 653                        sch->bstats.packets++;
 654                        sch->bstats.bytes += qdisc_pkt_len(skb);
 655                        if (!cl->next_alive)
 656                                cbq_activate_class(cl);
 657                        return 0;
 658                }
 659                if (net_xmit_drop_count(ret))
 660                        sch->qstats.drops++;
 661                return 0;
 662        }
 663
 664        sch->qstats.drops++;
 665        return -1;
 666}
 667#endif
 668
 669/*
 670   It is mission critical procedure.
 671
 672   We "regenerate" toplevel cutoff, if transmitting class
 673   has backlog and it is not regulated. It is not part of
 674   original CBQ description, but looks more reasonable.
 675   Probably, it is wrong. This question needs further investigation.
 676*/
 677
 678static __inline__ void
 679cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 680                    struct cbq_class *borrowed)
 681{
 682        if (cl && q->toplevel >= borrowed->level) {
 683                if (cl->q->q.qlen > 1) {
 684                        do {
 685                                if (borrowed->undertime == PSCHED_PASTPERFECT) {
 686                                        q->toplevel = borrowed->level;
 687                                        return;
 688                                }
 689                        } while ((borrowed=borrowed->borrow) != NULL);
 690                }
 691#if 0
 692        /* It is not necessary now. Uncommenting it
 693           will save CPU cycles, but decrease fairness.
 694         */
 695                q->toplevel = TC_CBQ_MAXLEVEL;
 696#endif
 697        }
 698}
 699
 700static void
 701cbq_update(struct cbq_sched_data *q)
 702{
 703        struct cbq_class *this = q->tx_class;
 704        struct cbq_class *cl = this;
 705        int len = q->tx_len;
 706
 707        q->tx_class = NULL;
 708
 709        for ( ; cl; cl = cl->share) {
 710                long avgidle = cl->avgidle;
 711                long idle;
 712
 713                cl->bstats.packets++;
 714                cl->bstats.bytes += len;
 715
 716                /*
 717                   (now - last) is total time between packet right edges.
 718                   (last_pktlen/rate) is "virtual" busy time, so that
 719
 720                         idle = (now - last) - last_pktlen/rate
 721                 */
 722
 723                idle = q->now - cl->last;
 724                if ((unsigned long)idle > 128*1024*1024) {
 725                        avgidle = cl->maxidle;
 726                } else {
 727                        idle -= L2T(cl, len);
 728
 729                /* true_avgidle := (1-W)*true_avgidle + W*idle,
 730                   where W=2^{-ewma_log}. But cl->avgidle is scaled:
 731                   cl->avgidle == true_avgidle/W,
 732                   hence:
 733                 */
 734                        avgidle += idle - (avgidle>>cl->ewma_log);
 735                }
 736
 737                if (avgidle <= 0) {
 738                        /* Overlimit or at-limit */
 739
 740                        if (avgidle < cl->minidle)
 741                                avgidle = cl->minidle;
 742
 743                        cl->avgidle = avgidle;
 744
 745                        /* Calculate expected time, when this class
 746                           will be allowed to send.
 747                           It will occur, when:
 748                           (1-W)*true_avgidle + W*delay = 0, i.e.
 749                           idle = (1/W - 1)*(-true_avgidle)
 750                           or
 751                           idle = (1 - W)*(-cl->avgidle);
 752                         */
 753                        idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 754
 755                        /*
 756                           That is not all.
 757                           To maintain the rate allocated to the class,
 758                           we add to undertime virtual clock,
 759                           necessary to complete transmitted packet.
 760                           (len/phys_bandwidth has been already passed
 761                           to the moment of cbq_update)
 762                         */
 763
 764                        idle -= L2T(&q->link, len);
 765                        idle += L2T(cl, len);
 766
 767                        cl->undertime = q->now + idle;
 768                } else {
 769                        /* Underlimit */
 770
 771                        cl->undertime = PSCHED_PASTPERFECT;
 772                        if (avgidle > cl->maxidle)
 773                                cl->avgidle = cl->maxidle;
 774                        else
 775                                cl->avgidle = avgidle;
 776                }
 777                cl->last = q->now;
 778        }
 779
 780        cbq_update_toplevel(q, this, q->tx_borrowed);
 781}
 782
 783static __inline__ struct cbq_class *
 784cbq_under_limit(struct cbq_class *cl)
 785{
 786        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 787        struct cbq_class *this_cl = cl;
 788
 789        if (cl->tparent == NULL)
 790                return cl;
 791
 792        if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 793                cl->delayed = 0;
 794                return cl;
 795        }
 796
 797        do {
 798                /* It is very suspicious place. Now overlimit
 799                   action is generated for not bounded classes
 800                   only if link is completely congested.
 801                   Though it is in agree with ancestor-only paradigm,
 802                   it looks very stupid. Particularly,
 803                   it means that this chunk of code will either
 804                   never be called or result in strong amplification
 805                   of burstiness. Dangerous, silly, and, however,
 806                   no another solution exists.
 807                 */
 808                if ((cl = cl->borrow) == NULL) {
 809                        this_cl->qstats.overlimits++;
 810                        this_cl->overlimit(this_cl);
 811                        return NULL;
 812                }
 813                if (cl->level > q->toplevel)
 814                        return NULL;
 815        } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 816
 817        cl->delayed = 0;
 818        return cl;
 819}
 820
 821static __inline__ struct sk_buff *
 822cbq_dequeue_prio(struct Qdisc *sch, int prio)
 823{
 824        struct cbq_sched_data *q = qdisc_priv(sch);
 825        struct cbq_class *cl_tail, *cl_prev, *cl;
 826        struct sk_buff *skb;
 827        int deficit;
 828
 829        cl_tail = cl_prev = q->active[prio];
 830        cl = cl_prev->next_alive;
 831
 832        do {
 833                deficit = 0;
 834
 835                /* Start round */
 836                do {
 837                        struct cbq_class *borrow = cl;
 838
 839                        if (cl->q->q.qlen &&
 840                            (borrow = cbq_under_limit(cl)) == NULL)
 841                                goto skip_class;
 842
 843                        if (cl->deficit <= 0) {
 844                                /* Class exhausted its allotment per
 845                                   this round. Switch to the next one.
 846                                 */
 847                                deficit = 1;
 848                                cl->deficit += cl->quantum;
 849                                goto next_class;
 850                        }
 851
 852                        skb = cl->q->dequeue(cl->q);
 853
 854                        /* Class did not give us any skb :-(
 855                           It could occur even if cl->q->q.qlen != 0
 856                           f.e. if cl->q == "tbf"
 857                         */
 858                        if (skb == NULL)
 859                                goto skip_class;
 860
 861                        cl->deficit -= qdisc_pkt_len(skb);
 862                        q->tx_class = cl;
 863                        q->tx_borrowed = borrow;
 864                        if (borrow != cl) {
 865#ifndef CBQ_XSTATS_BORROWS_BYTES
 866                                borrow->xstats.borrows++;
 867                                cl->xstats.borrows++;
 868#else
 869                                borrow->xstats.borrows += qdisc_pkt_len(skb);
 870                                cl->xstats.borrows += qdisc_pkt_len(skb);
 871#endif
 872                        }
 873                        q->tx_len = qdisc_pkt_len(skb);
 874
 875                        if (cl->deficit <= 0) {
 876                                q->active[prio] = cl;
 877                                cl = cl->next_alive;
 878                                cl->deficit += cl->quantum;
 879                        }
 880                        return skb;
 881
 882skip_class:
 883                        if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 884                                /* Class is empty or penalized.
 885                                   Unlink it from active chain.
 886                                 */
 887                                cl_prev->next_alive = cl->next_alive;
 888                                cl->next_alive = NULL;
 889
 890                                /* Did cl_tail point to it? */
 891                                if (cl == cl_tail) {
 892                                        /* Repair it! */
 893                                        cl_tail = cl_prev;
 894
 895                                        /* Was it the last class in this band? */
 896                                        if (cl == cl_tail) {
 897                                                /* Kill the band! */
 898                                                q->active[prio] = NULL;
 899                                                q->activemask &= ~(1<<prio);
 900                                                if (cl->q->q.qlen)
 901                                                        cbq_activate_class(cl);
 902                                                return NULL;
 903                                        }
 904
 905                                        q->active[prio] = cl_tail;
 906                                }
 907                                if (cl->q->q.qlen)
 908                                        cbq_activate_class(cl);
 909
 910                                cl = cl_prev;
 911                        }
 912
 913next_class:
 914                        cl_prev = cl;
 915                        cl = cl->next_alive;
 916                } while (cl_prev != cl_tail);
 917        } while (deficit);
 918
 919        q->active[prio] = cl_prev;
 920
 921        return NULL;
 922}
 923
 924static __inline__ struct sk_buff *
 925cbq_dequeue_1(struct Qdisc *sch)
 926{
 927        struct cbq_sched_data *q = qdisc_priv(sch);
 928        struct sk_buff *skb;
 929        unsigned activemask;
 930
 931        activemask = q->activemask&0xFF;
 932        while (activemask) {
 933                int prio = ffz(~activemask);
 934                activemask &= ~(1<<prio);
 935                skb = cbq_dequeue_prio(sch, prio);
 936                if (skb)
 937                        return skb;
 938        }
 939        return NULL;
 940}
 941
 942static struct sk_buff *
 943cbq_dequeue(struct Qdisc *sch)
 944{
 945        struct sk_buff *skb;
 946        struct cbq_sched_data *q = qdisc_priv(sch);
 947        psched_time_t now;
 948        psched_tdiff_t incr;
 949
 950        now = psched_get_time();
 951        incr = now - q->now_rt;
 952
 953        if (q->tx_class) {
 954                psched_tdiff_t incr2;
 955                /* Time integrator. We calculate EOS time
 956                   by adding expected packet transmission time.
 957                   If real time is greater, we warp artificial clock,
 958                   so that:
 959
 960                   cbq_time = max(real_time, work);
 961                 */
 962                incr2 = L2T(&q->link, q->tx_len);
 963                q->now += incr2;
 964                cbq_update(q);
 965                if ((incr -= incr2) < 0)
 966                        incr = 0;
 967        }
 968        q->now += incr;
 969        q->now_rt = now;
 970
 971        for (;;) {
 972                q->wd_expires = 0;
 973
 974                skb = cbq_dequeue_1(sch);
 975                if (skb) {
 976                        sch->q.qlen--;
 977                        sch->flags &= ~TCQ_F_THROTTLED;
 978                        return skb;
 979                }
 980
 981                /* All the classes are overlimit.
 982
 983                   It is possible, if:
 984
 985                   1. Scheduler is empty.
 986                   2. Toplevel cutoff inhibited borrowing.
 987                   3. Root class is overlimit.
 988
 989                   Reset 2d and 3d conditions and retry.
 990
 991                   Note, that NS and cbq-2.0 are buggy, peeking
 992                   an arbitrary class is appropriate for ancestor-only
 993                   sharing, but not for toplevel algorithm.
 994
 995                   Our version is better, but slower, because it requires
 996                   two passes, but it is unavoidable with top-level sharing.
 997                */
 998
 999                if (q->toplevel == TC_CBQ_MAXLEVEL &&
1000                    q->link.undertime == PSCHED_PASTPERFECT)
1001                        break;
1002
1003                q->toplevel = TC_CBQ_MAXLEVEL;
1004                q->link.undertime = PSCHED_PASTPERFECT;
1005        }
1006
1007        /* No packets in scheduler or nobody wants to give them to us :-(
1008           Sigh... start watchdog timer in the last case. */
1009
1010        if (sch->q.qlen) {
1011                sch->qstats.overlimits++;
1012                if (q->wd_expires)
1013                        qdisc_watchdog_schedule(&q->watchdog,
1014                                                now + q->wd_expires);
1015        }
1016        return NULL;
1017}
1018
1019/* CBQ class maintanance routines */
1020
1021static void cbq_adjust_levels(struct cbq_class *this)
1022{
1023        if (this == NULL)
1024                return;
1025
1026        do {
1027                int level = 0;
1028                struct cbq_class *cl;
1029
1030                if ((cl = this->children) != NULL) {
1031                        do {
1032                                if (cl->level > level)
1033                                        level = cl->level;
1034                        } while ((cl = cl->sibling) != this->children);
1035                }
1036                this->level = level+1;
1037        } while ((this = this->tparent) != NULL);
1038}
1039
1040static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1041{
1042        struct cbq_class *cl;
1043        struct hlist_node *n;
1044        unsigned int h;
1045
1046        if (q->quanta[prio] == 0)
1047                return;
1048
1049        for (h = 0; h < q->clhash.hashsize; h++) {
1050                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1051                        /* BUGGGG... Beware! This expression suffer of
1052                           arithmetic overflows!
1053                         */
1054                        if (cl->priority == prio) {
1055                                cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1056                                        q->quanta[prio];
1057                        }
1058                        if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1059                                printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1060                                cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1061                        }
1062                }
1063        }
1064}
1065
1066static void cbq_sync_defmap(struct cbq_class *cl)
1067{
1068        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1069        struct cbq_class *split = cl->split;
1070        unsigned h;
1071        int i;
1072
1073        if (split == NULL)
1074                return;
1075
1076        for (i=0; i<=TC_PRIO_MAX; i++) {
1077                if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1078                        split->defaults[i] = NULL;
1079        }
1080
1081        for (i=0; i<=TC_PRIO_MAX; i++) {
1082                int level = split->level;
1083
1084                if (split->defaults[i])
1085                        continue;
1086
1087                for (h = 0; h < q->clhash.hashsize; h++) {
1088                        struct hlist_node *n;
1089                        struct cbq_class *c;
1090
1091                        hlist_for_each_entry(c, n, &q->clhash.hash[h],
1092                                             common.hnode) {
1093                                if (c->split == split && c->level < level &&
1094                                    c->defmap&(1<<i)) {
1095                                        split->defaults[i] = c;
1096                                        level = c->level;
1097                                }
1098                        }
1099                }
1100        }
1101}
1102
1103static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1104{
1105        struct cbq_class *split = NULL;
1106
1107        if (splitid == 0) {
1108                if ((split = cl->split) == NULL)
1109                        return;
1110                splitid = split->common.classid;
1111        }
1112
1113        if (split == NULL || split->common.classid != splitid) {
1114                for (split = cl->tparent; split; split = split->tparent)
1115                        if (split->common.classid == splitid)
1116                                break;
1117        }
1118
1119        if (split == NULL)
1120                return;
1121
1122        if (cl->split != split) {
1123                cl->defmap = 0;
1124                cbq_sync_defmap(cl);
1125                cl->split = split;
1126                cl->defmap = def&mask;
1127        } else
1128                cl->defmap = (cl->defmap&~mask)|(def&mask);
1129
1130        cbq_sync_defmap(cl);
1131}
1132
1133static void cbq_unlink_class(struct cbq_class *this)
1134{
1135        struct cbq_class *cl, **clp;
1136        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1137
1138        qdisc_class_hash_remove(&q->clhash, &this->common);
1139
1140        if (this->tparent) {
1141                clp=&this->sibling;
1142                cl = *clp;
1143                do {
1144                        if (cl == this) {
1145                                *clp = cl->sibling;
1146                                break;
1147                        }
1148                        clp = &cl->sibling;
1149                } while ((cl = *clp) != this->sibling);
1150
1151                if (this->tparent->children == this) {
1152                        this->tparent->children = this->sibling;
1153                        if (this->sibling == this)
1154                                this->tparent->children = NULL;
1155                }
1156        } else {
1157                WARN_ON(this->sibling != this);
1158        }
1159}
1160
1161static void cbq_link_class(struct cbq_class *this)
1162{
1163        struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1164        struct cbq_class *parent = this->tparent;
1165
1166        this->sibling = this;
1167        qdisc_class_hash_insert(&q->clhash, &this->common);
1168
1169        if (parent == NULL)
1170                return;
1171
1172        if (parent->children == NULL) {
1173                parent->children = this;
1174        } else {
1175                this->sibling = parent->children->sibling;
1176                parent->children->sibling = this;
1177        }
1178}
1179
1180static unsigned int cbq_drop(struct Qdisc* sch)
1181{
1182        struct cbq_sched_data *q = qdisc_priv(sch);
1183        struct cbq_class *cl, *cl_head;
1184        int prio;
1185        unsigned int len;
1186
1187        for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1188                if ((cl_head = q->active[prio]) == NULL)
1189                        continue;
1190
1191                cl = cl_head;
1192                do {
1193                        if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1194                                sch->q.qlen--;
1195                                if (!cl->q->q.qlen)
1196                                        cbq_deactivate_class(cl);
1197                                return len;
1198                        }
1199                } while ((cl = cl->next_alive) != cl_head);
1200        }
1201        return 0;
1202}
1203
1204static void
1205cbq_reset(struct Qdisc* sch)
1206{
1207        struct cbq_sched_data *q = qdisc_priv(sch);
1208        struct cbq_class *cl;
1209        struct hlist_node *n;
1210        int prio;
1211        unsigned h;
1212
1213        q->activemask = 0;
1214        q->pmask = 0;
1215        q->tx_class = NULL;
1216        q->tx_borrowed = NULL;
1217        qdisc_watchdog_cancel(&q->watchdog);
1218        hrtimer_cancel(&q->delay_timer);
1219        q->toplevel = TC_CBQ_MAXLEVEL;
1220        q->now = psched_get_time();
1221        q->now_rt = q->now;
1222
1223        for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1224                q->active[prio] = NULL;
1225
1226        for (h = 0; h < q->clhash.hashsize; h++) {
1227                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1228                        qdisc_reset(cl->q);
1229
1230                        cl->next_alive = NULL;
1231                        cl->undertime = PSCHED_PASTPERFECT;
1232                        cl->avgidle = cl->maxidle;
1233                        cl->deficit = cl->quantum;
1234                        cl->cpriority = cl->priority;
1235                }
1236        }
1237        sch->q.qlen = 0;
1238}
1239
1240
1241static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1242{
1243        if (lss->change&TCF_CBQ_LSS_FLAGS) {
1244                cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1245                cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1246        }
1247        if (lss->change&TCF_CBQ_LSS_EWMA)
1248                cl->ewma_log = lss->ewma_log;
1249        if (lss->change&TCF_CBQ_LSS_AVPKT)
1250                cl->avpkt = lss->avpkt;
1251        if (lss->change&TCF_CBQ_LSS_MINIDLE)
1252                cl->minidle = -(long)lss->minidle;
1253        if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1254                cl->maxidle = lss->maxidle;
1255                cl->avgidle = lss->maxidle;
1256        }
1257        if (lss->change&TCF_CBQ_LSS_OFFTIME)
1258                cl->offtime = lss->offtime;
1259        return 0;
1260}
1261
1262static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1263{
1264        q->nclasses[cl->priority]--;
1265        q->quanta[cl->priority] -= cl->weight;
1266        cbq_normalize_quanta(q, cl->priority);
1267}
1268
1269static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1270{
1271        q->nclasses[cl->priority]++;
1272        q->quanta[cl->priority] += cl->weight;
1273        cbq_normalize_quanta(q, cl->priority);
1274}
1275
1276static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1277{
1278        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1279
1280        if (wrr->allot)
1281                cl->allot = wrr->allot;
1282        if (wrr->weight)
1283                cl->weight = wrr->weight;
1284        if (wrr->priority) {
1285                cl->priority = wrr->priority-1;
1286                cl->cpriority = cl->priority;
1287                if (cl->priority >= cl->priority2)
1288                        cl->priority2 = TC_CBQ_MAXPRIO-1;
1289        }
1290
1291        cbq_addprio(q, cl);
1292        return 0;
1293}
1294
1295static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1296{
1297        switch (ovl->strategy) {
1298        case TC_CBQ_OVL_CLASSIC:
1299                cl->overlimit = cbq_ovl_classic;
1300                break;
1301        case TC_CBQ_OVL_DELAY:
1302                cl->overlimit = cbq_ovl_delay;
1303                break;
1304        case TC_CBQ_OVL_LOWPRIO:
1305                if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1306                    ovl->priority2-1 <= cl->priority)
1307                        return -EINVAL;
1308                cl->priority2 = ovl->priority2-1;
1309                cl->overlimit = cbq_ovl_lowprio;
1310                break;
1311        case TC_CBQ_OVL_DROP:
1312                cl->overlimit = cbq_ovl_drop;
1313                break;
1314        case TC_CBQ_OVL_RCLASSIC:
1315                cl->overlimit = cbq_ovl_rclassic;
1316                break;
1317        default:
1318                return -EINVAL;
1319        }
1320        cl->penalty = ovl->penalty;
1321        return 0;
1322}
1323
1324#ifdef CONFIG_NET_CLS_ACT
1325static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1326{
1327        cl->police = p->police;
1328
1329        if (cl->q->handle) {
1330                if (p->police == TC_POLICE_RECLASSIFY)
1331                        cl->q->reshape_fail = cbq_reshape_fail;
1332                else
1333                        cl->q->reshape_fail = NULL;
1334        }
1335        return 0;
1336}
1337#endif
1338
1339static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1340{
1341        cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1342        return 0;
1343}
1344
1345static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1346        [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1347        [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1348        [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1349        [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1350        [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1351        [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1352        [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1353};
1354
1355static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1356{
1357        struct cbq_sched_data *q = qdisc_priv(sch);
1358        struct nlattr *tb[TCA_CBQ_MAX + 1];
1359        struct tc_ratespec *r;
1360        int err;
1361
1362        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1363        if (err < 0)
1364                return err;
1365
1366        if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1367                return -EINVAL;
1368
1369        r = nla_data(tb[TCA_CBQ_RATE]);
1370
1371        if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1372                return -EINVAL;
1373
1374        err = qdisc_class_hash_init(&q->clhash);
1375        if (err < 0)
1376                goto put_rtab;
1377
1378        q->link.refcnt = 1;
1379        q->link.sibling = &q->link;
1380        q->link.common.classid = sch->handle;
1381        q->link.qdisc = sch;
1382        if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1383                                            &pfifo_qdisc_ops,
1384                                            sch->handle)))
1385                q->link.q = &noop_qdisc;
1386
1387        q->link.priority = TC_CBQ_MAXPRIO-1;
1388        q->link.priority2 = TC_CBQ_MAXPRIO-1;
1389        q->link.cpriority = TC_CBQ_MAXPRIO-1;
1390        q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1391        q->link.overlimit = cbq_ovl_classic;
1392        q->link.allot = psched_mtu(qdisc_dev(sch));
1393        q->link.quantum = q->link.allot;
1394        q->link.weight = q->link.R_tab->rate.rate;
1395
1396        q->link.ewma_log = TC_CBQ_DEF_EWMA;
1397        q->link.avpkt = q->link.allot/2;
1398        q->link.minidle = -0x7FFFFFFF;
1399
1400        qdisc_watchdog_init(&q->watchdog, sch);
1401        hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1402        q->delay_timer.function = cbq_undelay;
1403        q->toplevel = TC_CBQ_MAXLEVEL;
1404        q->now = psched_get_time();
1405        q->now_rt = q->now;
1406
1407        cbq_link_class(&q->link);
1408
1409        if (tb[TCA_CBQ_LSSOPT])
1410                cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1411
1412        cbq_addprio(q, &q->link);
1413        return 0;
1414
1415put_rtab:
1416        qdisc_put_rtab(q->link.R_tab);
1417        return err;
1418}
1419
1420static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1421{
1422        unsigned char *b = skb_tail_pointer(skb);
1423
1424        NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1425        return skb->len;
1426
1427nla_put_failure:
1428        nlmsg_trim(skb, b);
1429        return -1;
1430}
1431
1432static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1433{
1434        unsigned char *b = skb_tail_pointer(skb);
1435        struct tc_cbq_lssopt opt;
1436
1437        opt.flags = 0;
1438        if (cl->borrow == NULL)
1439                opt.flags |= TCF_CBQ_LSS_BOUNDED;
1440        if (cl->share == NULL)
1441                opt.flags |= TCF_CBQ_LSS_ISOLATED;
1442        opt.ewma_log = cl->ewma_log;
1443        opt.level = cl->level;
1444        opt.avpkt = cl->avpkt;
1445        opt.maxidle = cl->maxidle;
1446        opt.minidle = (u32)(-cl->minidle);
1447        opt.offtime = cl->offtime;
1448        opt.change = ~0;
1449        NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1450        return skb->len;
1451
1452nla_put_failure:
1453        nlmsg_trim(skb, b);
1454        return -1;
1455}
1456
1457static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1458{
1459        unsigned char *b = skb_tail_pointer(skb);
1460        struct tc_cbq_wrropt opt;
1461
1462        opt.flags = 0;
1463        opt.allot = cl->allot;
1464        opt.priority = cl->priority+1;
1465        opt.cpriority = cl->cpriority+1;
1466        opt.weight = cl->weight;
1467        NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1468        return skb->len;
1469
1470nla_put_failure:
1471        nlmsg_trim(skb, b);
1472        return -1;
1473}
1474
1475static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1476{
1477        unsigned char *b = skb_tail_pointer(skb);
1478        struct tc_cbq_ovl opt;
1479
1480        opt.strategy = cl->ovl_strategy;
1481        opt.priority2 = cl->priority2+1;
1482        opt.pad = 0;
1483        opt.penalty = cl->penalty;
1484        NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1485        return skb->len;
1486
1487nla_put_failure:
1488        nlmsg_trim(skb, b);
1489        return -1;
1490}
1491
1492static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1493{
1494        unsigned char *b = skb_tail_pointer(skb);
1495        struct tc_cbq_fopt opt;
1496
1497        if (cl->split || cl->defmap) {
1498                opt.split = cl->split ? cl->split->common.classid : 0;
1499                opt.defmap = cl->defmap;
1500                opt.defchange = ~0;
1501                NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1502        }
1503        return skb->len;
1504
1505nla_put_failure:
1506        nlmsg_trim(skb, b);
1507        return -1;
1508}
1509
1510#ifdef CONFIG_NET_CLS_ACT
1511static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1512{
1513        unsigned char *b = skb_tail_pointer(skb);
1514        struct tc_cbq_police opt;
1515
1516        if (cl->police) {
1517                opt.police = cl->police;
1518                opt.__res1 = 0;
1519                opt.__res2 = 0;
1520                NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1521        }
1522        return skb->len;
1523
1524nla_put_failure:
1525        nlmsg_trim(skb, b);
1526        return -1;
1527}
1528#endif
1529
1530static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1531{
1532        if (cbq_dump_lss(skb, cl) < 0 ||
1533            cbq_dump_rate(skb, cl) < 0 ||
1534            cbq_dump_wrr(skb, cl) < 0 ||
1535            cbq_dump_ovl(skb, cl) < 0 ||
1536#ifdef CONFIG_NET_CLS_ACT
1537            cbq_dump_police(skb, cl) < 0 ||
1538#endif
1539            cbq_dump_fopt(skb, cl) < 0)
1540                return -1;
1541        return 0;
1542}
1543
1544static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1545{
1546        struct cbq_sched_data *q = qdisc_priv(sch);
1547        struct nlattr *nest;
1548
1549        nest = nla_nest_start(skb, TCA_OPTIONS);
1550        if (nest == NULL)
1551                goto nla_put_failure;
1552        if (cbq_dump_attr(skb, &q->link) < 0)
1553                goto nla_put_failure;
1554        nla_nest_end(skb, nest);
1555        return skb->len;
1556
1557nla_put_failure:
1558        nla_nest_cancel(skb, nest);
1559        return -1;
1560}
1561
1562static int
1563cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1564{
1565        struct cbq_sched_data *q = qdisc_priv(sch);
1566
1567        q->link.xstats.avgidle = q->link.avgidle;
1568        return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1569}
1570
1571static int
1572cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1573               struct sk_buff *skb, struct tcmsg *tcm)
1574{
1575        struct cbq_class *cl = (struct cbq_class*)arg;
1576        struct nlattr *nest;
1577
1578        if (cl->tparent)
1579                tcm->tcm_parent = cl->tparent->common.classid;
1580        else
1581                tcm->tcm_parent = TC_H_ROOT;
1582        tcm->tcm_handle = cl->common.classid;
1583        tcm->tcm_info = cl->q->handle;
1584
1585        nest = nla_nest_start(skb, TCA_OPTIONS);
1586        if (nest == NULL)
1587                goto nla_put_failure;
1588        if (cbq_dump_attr(skb, cl) < 0)
1589                goto nla_put_failure;
1590        nla_nest_end(skb, nest);
1591        return skb->len;
1592
1593nla_put_failure:
1594        nla_nest_cancel(skb, nest);
1595        return -1;
1596}
1597
1598static int
1599cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1600        struct gnet_dump *d)
1601{
1602        struct cbq_sched_data *q = qdisc_priv(sch);
1603        struct cbq_class *cl = (struct cbq_class*)arg;
1604
1605        cl->qstats.qlen = cl->q->q.qlen;
1606        cl->xstats.avgidle = cl->avgidle;
1607        cl->xstats.undertime = 0;
1608
1609        if (cl->undertime != PSCHED_PASTPERFECT)
1610                cl->xstats.undertime = cl->undertime - q->now;
1611
1612        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1613            gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1614            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1615                return -1;
1616
1617        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1618}
1619
1620static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1621                     struct Qdisc **old)
1622{
1623        struct cbq_class *cl = (struct cbq_class*)arg;
1624
1625        if (new == NULL) {
1626                new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1627                                        &pfifo_qdisc_ops, cl->common.classid);
1628                if (new == NULL)
1629                        return -ENOBUFS;
1630        } else {
1631#ifdef CONFIG_NET_CLS_ACT
1632                if (cl->police == TC_POLICE_RECLASSIFY)
1633                        new->reshape_fail = cbq_reshape_fail;
1634#endif
1635        }
1636        sch_tree_lock(sch);
1637        *old = cl->q;
1638        cl->q = new;
1639        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1640        qdisc_reset(*old);
1641        sch_tree_unlock(sch);
1642
1643        return 0;
1644}
1645
1646static struct Qdisc *
1647cbq_leaf(struct Qdisc *sch, unsigned long arg)
1648{
1649        struct cbq_class *cl = (struct cbq_class*)arg;
1650
1651        return cl->q;
1652}
1653
1654static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1655{
1656        struct cbq_class *cl = (struct cbq_class *)arg;
1657
1658        if (cl->q->q.qlen == 0)
1659                cbq_deactivate_class(cl);
1660}
1661
1662static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1663{
1664        struct cbq_sched_data *q = qdisc_priv(sch);
1665        struct cbq_class *cl = cbq_class_lookup(q, classid);
1666
1667        if (cl) {
1668                cl->refcnt++;
1669                return (unsigned long)cl;
1670        }
1671        return 0;
1672}
1673
1674static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1675{
1676        struct cbq_sched_data *q = qdisc_priv(sch);
1677
1678        WARN_ON(cl->filters);
1679
1680        tcf_destroy_chain(&cl->filter_list);
1681        qdisc_destroy(cl->q);
1682        qdisc_put_rtab(cl->R_tab);
1683        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1684        if (cl != &q->link)
1685                kfree(cl);
1686}
1687
1688static void
1689cbq_destroy(struct Qdisc* sch)
1690{
1691        struct cbq_sched_data *q = qdisc_priv(sch);
1692        struct hlist_node *n, *next;
1693        struct cbq_class *cl;
1694        unsigned h;
1695
1696#ifdef CONFIG_NET_CLS_ACT
1697        q->rx_class = NULL;
1698#endif
1699        /*
1700         * Filters must be destroyed first because we don't destroy the
1701         * classes from root to leafs which means that filters can still
1702         * be bound to classes which have been destroyed already. --TGR '04
1703         */
1704        for (h = 0; h < q->clhash.hashsize; h++) {
1705                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1706                        tcf_destroy_chain(&cl->filter_list);
1707        }
1708        for (h = 0; h < q->clhash.hashsize; h++) {
1709                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1710                                          common.hnode)
1711                        cbq_destroy_class(sch, cl);
1712        }
1713        qdisc_class_hash_destroy(&q->clhash);
1714}
1715
1716static void cbq_put(struct Qdisc *sch, unsigned long arg)
1717{
1718        struct cbq_class *cl = (struct cbq_class*)arg;
1719
1720        if (--cl->refcnt == 0) {
1721#ifdef CONFIG_NET_CLS_ACT
1722                spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1723                struct cbq_sched_data *q = qdisc_priv(sch);
1724
1725                spin_lock_bh(root_lock);
1726                if (q->rx_class == cl)
1727                        q->rx_class = NULL;
1728                spin_unlock_bh(root_lock);
1729#endif
1730
1731                cbq_destroy_class(sch, cl);
1732        }
1733}
1734
1735static int
1736cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1737                 unsigned long *arg)
1738{
1739        int err;
1740        struct cbq_sched_data *q = qdisc_priv(sch);
1741        struct cbq_class *cl = (struct cbq_class*)*arg;
1742        struct nlattr *opt = tca[TCA_OPTIONS];
1743        struct nlattr *tb[TCA_CBQ_MAX + 1];
1744        struct cbq_class *parent;
1745        struct qdisc_rate_table *rtab = NULL;
1746
1747        if (opt == NULL)
1748                return -EINVAL;
1749
1750        err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1751        if (err < 0)
1752                return err;
1753
1754        if (cl) {
1755                /* Check parent */
1756                if (parentid) {
1757                        if (cl->tparent &&
1758                            cl->tparent->common.classid != parentid)
1759                                return -EINVAL;
1760                        if (!cl->tparent && parentid != TC_H_ROOT)
1761                                return -EINVAL;
1762                }
1763
1764                if (tb[TCA_CBQ_RATE]) {
1765                        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1766                                              tb[TCA_CBQ_RTAB]);
1767                        if (rtab == NULL)
1768                                return -EINVAL;
1769                }
1770
1771                if (tca[TCA_RATE]) {
1772                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1773                                                    qdisc_root_sleeping_lock(sch),
1774                                                    tca[TCA_RATE]);
1775                        if (err) {
1776                                if (rtab)
1777                                        qdisc_put_rtab(rtab);
1778                                return err;
1779                        }
1780                }
1781
1782                /* Change class parameters */
1783                sch_tree_lock(sch);
1784
1785                if (cl->next_alive != NULL)
1786                        cbq_deactivate_class(cl);
1787
1788                if (rtab) {
1789                        qdisc_put_rtab(cl->R_tab);
1790                        cl->R_tab = rtab;
1791                }
1792
1793                if (tb[TCA_CBQ_LSSOPT])
1794                        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1795
1796                if (tb[TCA_CBQ_WRROPT]) {
1797                        cbq_rmprio(q, cl);
1798                        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1799                }
1800
1801                if (tb[TCA_CBQ_OVL_STRATEGY])
1802                        cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1803
1804#ifdef CONFIG_NET_CLS_ACT
1805                if (tb[TCA_CBQ_POLICE])
1806                        cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1807#endif
1808
1809                if (tb[TCA_CBQ_FOPT])
1810                        cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1811
1812                if (cl->q->q.qlen)
1813                        cbq_activate_class(cl);
1814
1815                sch_tree_unlock(sch);
1816
1817                return 0;
1818        }
1819
1820        if (parentid == TC_H_ROOT)
1821                return -EINVAL;
1822
1823        if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1824            tb[TCA_CBQ_LSSOPT] == NULL)
1825                return -EINVAL;
1826
1827        rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1828        if (rtab == NULL)
1829                return -EINVAL;
1830
1831        if (classid) {
1832                err = -EINVAL;
1833                if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1834                        goto failure;
1835        } else {
1836                int i;
1837                classid = TC_H_MAKE(sch->handle,0x8000);
1838
1839                for (i=0; i<0x8000; i++) {
1840                        if (++q->hgenerator >= 0x8000)
1841                                q->hgenerator = 1;
1842                        if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1843                                break;
1844                }
1845                err = -ENOSR;
1846                if (i >= 0x8000)
1847                        goto failure;
1848                classid = classid|q->hgenerator;
1849        }
1850
1851        parent = &q->link;
1852        if (parentid) {
1853                parent = cbq_class_lookup(q, parentid);
1854                err = -EINVAL;
1855                if (parent == NULL)
1856                        goto failure;
1857        }
1858
1859        err = -ENOBUFS;
1860        cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1861        if (cl == NULL)
1862                goto failure;
1863
1864        if (tca[TCA_RATE]) {
1865                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1866                                        qdisc_root_sleeping_lock(sch),
1867                                        tca[TCA_RATE]);
1868                if (err) {
1869                        kfree(cl);
1870                        goto failure;
1871                }
1872        }
1873
1874        cl->R_tab = rtab;
1875        rtab = NULL;
1876        cl->refcnt = 1;
1877        if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1878                                        &pfifo_qdisc_ops, classid)))
1879                cl->q = &noop_qdisc;
1880        cl->common.classid = classid;
1881        cl->tparent = parent;
1882        cl->qdisc = sch;
1883        cl->allot = parent->allot;
1884        cl->quantum = cl->allot;
1885        cl->weight = cl->R_tab->rate.rate;
1886
1887        sch_tree_lock(sch);
1888        cbq_link_class(cl);
1889        cl->borrow = cl->tparent;
1890        if (cl->tparent != &q->link)
1891                cl->share = cl->tparent;
1892        cbq_adjust_levels(parent);
1893        cl->minidle = -0x7FFFFFFF;
1894        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1895        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1896        if (cl->ewma_log==0)
1897                cl->ewma_log = q->link.ewma_log;
1898        if (cl->maxidle==0)
1899                cl->maxidle = q->link.maxidle;
1900        if (cl->avpkt==0)
1901                cl->avpkt = q->link.avpkt;
1902        cl->overlimit = cbq_ovl_classic;
1903        if (tb[TCA_CBQ_OVL_STRATEGY])
1904                cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1905#ifdef CONFIG_NET_CLS_ACT
1906        if (tb[TCA_CBQ_POLICE])
1907                cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1908#endif
1909        if (tb[TCA_CBQ_FOPT])
1910                cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1911        sch_tree_unlock(sch);
1912
1913        qdisc_class_hash_grow(sch, &q->clhash);
1914
1915        *arg = (unsigned long)cl;
1916        return 0;
1917
1918failure:
1919        qdisc_put_rtab(rtab);
1920        return err;
1921}
1922
1923static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1924{
1925        struct cbq_sched_data *q = qdisc_priv(sch);
1926        struct cbq_class *cl = (struct cbq_class*)arg;
1927        unsigned int qlen;
1928
1929        if (cl->filters || cl->children || cl == &q->link)
1930                return -EBUSY;
1931
1932        sch_tree_lock(sch);
1933
1934        qlen = cl->q->q.qlen;
1935        qdisc_reset(cl->q);
1936        qdisc_tree_decrease_qlen(cl->q, qlen);
1937
1938        if (cl->next_alive)
1939                cbq_deactivate_class(cl);
1940
1941        if (q->tx_borrowed == cl)
1942                q->tx_borrowed = q->tx_class;
1943        if (q->tx_class == cl) {
1944                q->tx_class = NULL;
1945                q->tx_borrowed = NULL;
1946        }
1947#ifdef CONFIG_NET_CLS_ACT
1948        if (q->rx_class == cl)
1949                q->rx_class = NULL;
1950#endif
1951
1952        cbq_unlink_class(cl);
1953        cbq_adjust_levels(cl->tparent);
1954        cl->defmap = 0;
1955        cbq_sync_defmap(cl);
1956
1957        cbq_rmprio(q, cl);
1958        sch_tree_unlock(sch);
1959
1960        BUG_ON(--cl->refcnt == 0);
1961        /*
1962         * This shouldn't happen: we "hold" one cops->get() when called
1963         * from tc_ctl_tclass; the destroy method is done from cops->put().
1964         */
1965
1966        return 0;
1967}
1968
1969static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1970{
1971        struct cbq_sched_data *q = qdisc_priv(sch);
1972        struct cbq_class *cl = (struct cbq_class *)arg;
1973
1974        if (cl == NULL)
1975                cl = &q->link;
1976
1977        return &cl->filter_list;
1978}
1979
1980static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1981                                     u32 classid)
1982{
1983        struct cbq_sched_data *q = qdisc_priv(sch);
1984        struct cbq_class *p = (struct cbq_class*)parent;
1985        struct cbq_class *cl = cbq_class_lookup(q, classid);
1986
1987        if (cl) {
1988                if (p && p->level <= cl->level)
1989                        return 0;
1990                cl->filters++;
1991                return (unsigned long)cl;
1992        }
1993        return 0;
1994}
1995
1996static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1997{
1998        struct cbq_class *cl = (struct cbq_class*)arg;
1999
2000        cl->filters--;
2001}
2002
2003static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2004{
2005        struct cbq_sched_data *q = qdisc_priv(sch);
2006        struct cbq_class *cl;
2007        struct hlist_node *n;
2008        unsigned h;
2009
2010        if (arg->stop)
2011                return;
2012
2013        for (h = 0; h < q->clhash.hashsize; h++) {
2014                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2015                        if (arg->count < arg->skip) {
2016                                arg->count++;
2017                                continue;
2018                        }
2019                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2020                                arg->stop = 1;
2021                                return;
2022                        }
2023                        arg->count++;
2024                }
2025        }
2026}
2027
2028static const struct Qdisc_class_ops cbq_class_ops = {
2029        .graft          =       cbq_graft,
2030        .leaf           =       cbq_leaf,
2031        .qlen_notify    =       cbq_qlen_notify,
2032        .get            =       cbq_get,
2033        .put            =       cbq_put,
2034        .change         =       cbq_change_class,
2035        .delete         =       cbq_delete,
2036        .walk           =       cbq_walk,
2037        .tcf_chain      =       cbq_find_tcf,
2038        .bind_tcf       =       cbq_bind_filter,
2039        .unbind_tcf     =       cbq_unbind_filter,
2040        .dump           =       cbq_dump_class,
2041        .dump_stats     =       cbq_dump_class_stats,
2042};
2043
2044static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2045        .next           =       NULL,
2046        .cl_ops         =       &cbq_class_ops,
2047        .id             =       "cbq",
2048        .priv_size      =       sizeof(struct cbq_sched_data),
2049        .enqueue        =       cbq_enqueue,
2050        .dequeue        =       cbq_dequeue,
2051        .peek           =       qdisc_peek_dequeued,
2052        .drop           =       cbq_drop,
2053        .init           =       cbq_init,
2054        .reset          =       cbq_reset,
2055        .destroy        =       cbq_destroy,
2056        .change         =       NULL,
2057        .dump           =       cbq_dump,
2058        .dump_stats     =       cbq_dump_stats,
2059        .owner          =       THIS_MODULE,
2060};
2061
2062static int __init cbq_module_init(void)
2063{
2064        return register_qdisc(&cbq_qdisc_ops);
2065}
2066static void __exit cbq_module_exit(void)
2067{
2068        unregister_qdisc(&cbq_qdisc_ops);
2069}
2070module_init(cbq_module_init)
2071module_exit(cbq_module_exit)
2072MODULE_LICENSE("GPL");
2073