linux/net/sched/sch_htb.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Martin Devera, <devik@cdi.cz>
  10 *
  11 * Credits (in time order) for older HTB versions:
  12 *              Stef Coene <stef.coene@docum.org>
  13 *                      HTB support at LARTC mailing list
  14 *              Ondrej Kraus, <krauso@barr.cz>
  15 *                      found missing INIT_QDISC(htb)
  16 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  17 *                      helped a lot to locate nasty class stall bug
  18 *              Andi Kleen, Jamal Hadi, Bert Hubert
  19 *                      code review and helpful comments on shaping
  20 *              Tomasz Wrona, <tw@eter.tym.pl>
  21 *                      created test case so that I was able to fix nasty bug
  22 *              Wilfried Weissmann
  23 *                      spotted bug in dequeue code and helped with fix
  24 *              Jiri Fojtasek
  25 *                      fixed requeue routine
  26 *              and many others. thanks.
  27 */
  28#include <linux/module.h>
  29#include <linux/moduleparam.h>
  30#include <linux/types.h>
  31#include <linux/kernel.h>
  32#include <linux/string.h>
  33#include <linux/errno.h>
  34#include <linux/skbuff.h>
  35#include <linux/list.h>
  36#include <linux/compiler.h>
  37#include <linux/rbtree.h>
  38#include <net/netlink.h>
  39#include <net/pkt_sched.h>
  40
  41/* HTB algorithm.
  42    Author: devik@cdi.cz
  43    ========================================================================
  44    HTB is like TBF with multiple classes. It is also similar to CBQ because
  45    it allows to assign priority to each class in hierarchy.
  46    In fact it is another implementation of Floyd's formal sharing.
  47
  48    Levels:
  49    Each class is assigned level. Leaf has ALWAYS level 0 and root
  50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  51    one less than their parent.
  52*/
  53
  54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  55#define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
  56
  57#if HTB_VER >> 16 != TC_HTB_PROTOVER
  58#error "Mismatched sch_htb.c and pkt_sch.h"
  59#endif
  60
  61/* Module parameter and sysfs export */
  62module_param    (htb_hysteresis, int, 0640);
  63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  64
  65/* used internaly to keep status of single class */
  66enum htb_cmode {
  67        HTB_CANT_SEND,          /* class can't send and can't borrow */
  68        HTB_MAY_BORROW,         /* class can't send but may borrow */
  69        HTB_CAN_SEND            /* class can send */
  70};
  71
  72/* interior & leaf nodes; props specific to leaves are marked L: */
  73struct htb_class {
  74        struct Qdisc_class_common common;
  75        /* general class parameters */
  76        struct gnet_stats_basic bstats;
  77        struct gnet_stats_queue qstats;
  78        struct gnet_stats_rate_est rate_est;
  79        struct tc_htb_xstats xstats;    /* our special stats */
  80        int refcnt;             /* usage count of this class */
  81
  82        /* topology */
  83        int level;              /* our level (see above) */
  84        unsigned int children;
  85        struct htb_class *parent;       /* parent class */
  86
  87        int prio;               /* these two are used only by leaves... */
  88        int quantum;            /* but stored for parent-to-leaf return */
  89
  90        union {
  91                struct htb_class_leaf {
  92                        struct Qdisc *q;
  93                        int deficit[TC_HTB_MAXDEPTH];
  94                        struct list_head drop_list;
  95                } leaf;
  96                struct htb_class_inner {
  97                        struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
  98                        struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
  99                        /* When class changes from state 1->2 and disconnects from
 100                           parent's feed then we lost ptr value and start from the
 101                           first child again. Here we store classid of the
 102                           last valid ptr (used when ptr is NULL). */
 103                        u32 last_ptr_id[TC_HTB_NUMPRIO];
 104                } inner;
 105        } un;
 106        struct rb_node node[TC_HTB_NUMPRIO];    /* node for self or feed tree */
 107        struct rb_node pq_node; /* node for event queue */
 108        psched_time_t pq_key;
 109
 110        int prio_activity;      /* for which prios are we active */
 111        enum htb_cmode cmode;   /* current mode of the class */
 112
 113        /* class attached filters */
 114        struct tcf_proto *filter_list;
 115        int filter_cnt;
 116
 117        int warned;             /* only one warning about non work conserving .. */
 118
 119        /* token bucket parameters */
 120        struct qdisc_rate_table *rate;  /* rate table of the class itself */
 121        struct qdisc_rate_table *ceil;  /* ceiling rate (limits borrows too) */
 122        long buffer, cbuffer;   /* token bucket depth/rate */
 123        psched_tdiff_t mbuffer; /* max wait time */
 124        long tokens, ctokens;   /* current number of tokens */
 125        psched_time_t t_c;      /* checkpoint time */
 126};
 127
 128struct htb_sched {
 129        struct Qdisc_class_hash clhash;
 130        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 131
 132        /* self list - roots of self generating tree */
 133        struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 134        int row_mask[TC_HTB_MAXDEPTH];
 135        struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 136        u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 137
 138        /* self wait list - roots of wait PQs per row */
 139        struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 140
 141        /* time of nearest event per level (row) */
 142        psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 143
 144        int defcls;             /* class where unclassified flows go to */
 145
 146        /* filters for qdisc itself */
 147        struct tcf_proto *filter_list;
 148
 149        int rate2quantum;       /* quant = rate / rate2quantum */
 150        psched_time_t now;      /* cached dequeue time */
 151        struct qdisc_watchdog watchdog;
 152
 153        /* non shaped skbs; let them go directly thru */
 154        struct sk_buff_head direct_queue;
 155        int direct_qlen;        /* max qlen of above */
 156
 157        long direct_pkts;
 158};
 159
 160/* find class in global hash table using given handle */
 161static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 162{
 163        struct htb_sched *q = qdisc_priv(sch);
 164        struct Qdisc_class_common *clc;
 165
 166        clc = qdisc_class_find(&q->clhash, handle);
 167        if (clc == NULL)
 168                return NULL;
 169        return container_of(clc, struct htb_class, common);
 170}
 171
 172/**
 173 * htb_classify - classify a packet into class
 174 *
 175 * It returns NULL if the packet should be dropped or -1 if the packet
 176 * should be passed directly thru. In all other cases leaf class is returned.
 177 * We allow direct class selection by classid in priority. The we examine
 178 * filters in qdisc and in inner nodes (if higher filter points to the inner
 179 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 180 * internal fifo (direct). These packets then go directly thru. If we still
 181 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
 182 * then finish and return direct queue.
 183 */
 184#define HTB_DIRECT (struct htb_class*)-1
 185
 186static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 187                                      int *qerr)
 188{
 189        struct htb_sched *q = qdisc_priv(sch);
 190        struct htb_class *cl;
 191        struct tcf_result res;
 192        struct tcf_proto *tcf;
 193        int result;
 194
 195        /* allow to select class by setting skb->priority to valid classid;
 196           note that nfmark can be used too by attaching filter fw with no
 197           rules in it */
 198        if (skb->priority == sch->handle)
 199                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 200        if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 201                return cl;
 202
 203        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 204        tcf = q->filter_list;
 205        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 206#ifdef CONFIG_NET_CLS_ACT
 207                switch (result) {
 208                case TC_ACT_QUEUED:
 209                case TC_ACT_STOLEN:
 210                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 211                case TC_ACT_SHOT:
 212                        return NULL;
 213                }
 214#endif
 215                if ((cl = (void *)res.class) == NULL) {
 216                        if (res.classid == sch->handle)
 217                                return HTB_DIRECT;      /* X:0 (direct flow) */
 218                        if ((cl = htb_find(res.classid, sch)) == NULL)
 219                                break;  /* filter selected invalid classid */
 220                }
 221                if (!cl->level)
 222                        return cl;      /* we hit leaf; return it */
 223
 224                /* we have got inner class; apply inner filter chain */
 225                tcf = cl->filter_list;
 226        }
 227        /* classification failed; try to use default class */
 228        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 229        if (!cl || cl->level)
 230                return HTB_DIRECT;      /* bad default .. this is safe bet */
 231        return cl;
 232}
 233
 234/**
 235 * htb_add_to_id_tree - adds class to the round robin list
 236 *
 237 * Routine adds class to the list (actually tree) sorted by classid.
 238 * Make sure that class is not already on such list for given prio.
 239 */
 240static void htb_add_to_id_tree(struct rb_root *root,
 241                               struct htb_class *cl, int prio)
 242{
 243        struct rb_node **p = &root->rb_node, *parent = NULL;
 244
 245        while (*p) {
 246                struct htb_class *c;
 247                parent = *p;
 248                c = rb_entry(parent, struct htb_class, node[prio]);
 249
 250                if (cl->common.classid > c->common.classid)
 251                        p = &parent->rb_right;
 252                else
 253                        p = &parent->rb_left;
 254        }
 255        rb_link_node(&cl->node[prio], parent, p);
 256        rb_insert_color(&cl->node[prio], root);
 257}
 258
 259/**
 260 * htb_add_to_wait_tree - adds class to the event queue with delay
 261 *
 262 * The class is added to priority event queue to indicate that class will
 263 * change its mode in cl->pq_key microseconds. Make sure that class is not
 264 * already in the queue.
 265 */
 266static void htb_add_to_wait_tree(struct htb_sched *q,
 267                                 struct htb_class *cl, long delay)
 268{
 269        struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 270
 271        cl->pq_key = q->now + delay;
 272        if (cl->pq_key == q->now)
 273                cl->pq_key++;
 274
 275        /* update the nearest event cache */
 276        if (q->near_ev_cache[cl->level] > cl->pq_key)
 277                q->near_ev_cache[cl->level] = cl->pq_key;
 278
 279        while (*p) {
 280                struct htb_class *c;
 281                parent = *p;
 282                c = rb_entry(parent, struct htb_class, pq_node);
 283                if (cl->pq_key >= c->pq_key)
 284                        p = &parent->rb_right;
 285                else
 286                        p = &parent->rb_left;
 287        }
 288        rb_link_node(&cl->pq_node, parent, p);
 289        rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
 290}
 291
 292/**
 293 * htb_next_rb_node - finds next node in binary tree
 294 *
 295 * When we are past last key we return NULL.
 296 * Average complexity is 2 steps per call.
 297 */
 298static inline void htb_next_rb_node(struct rb_node **n)
 299{
 300        *n = rb_next(*n);
 301}
 302
 303/**
 304 * htb_add_class_to_row - add class to its row
 305 *
 306 * The class is added to row at priorities marked in mask.
 307 * It does nothing if mask == 0.
 308 */
 309static inline void htb_add_class_to_row(struct htb_sched *q,
 310                                        struct htb_class *cl, int mask)
 311{
 312        q->row_mask[cl->level] |= mask;
 313        while (mask) {
 314                int prio = ffz(~mask);
 315                mask &= ~(1 << prio);
 316                htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 317        }
 318}
 319
 320/* If this triggers, it is a bug in this code, but it need not be fatal */
 321static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 322{
 323        if (RB_EMPTY_NODE(rb)) {
 324                WARN_ON(1);
 325        } else {
 326                rb_erase(rb, root);
 327                RB_CLEAR_NODE(rb);
 328        }
 329}
 330
 331
 332/**
 333 * htb_remove_class_from_row - removes class from its row
 334 *
 335 * The class is removed from row at priorities marked in mask.
 336 * It does nothing if mask == 0.
 337 */
 338static inline void htb_remove_class_from_row(struct htb_sched *q,
 339                                                 struct htb_class *cl, int mask)
 340{
 341        int m = 0;
 342
 343        while (mask) {
 344                int prio = ffz(~mask);
 345
 346                mask &= ~(1 << prio);
 347                if (q->ptr[cl->level][prio] == cl->node + prio)
 348                        htb_next_rb_node(q->ptr[cl->level] + prio);
 349
 350                htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
 351                if (!q->row[cl->level][prio].rb_node)
 352                        m |= 1 << prio;
 353        }
 354        q->row_mask[cl->level] &= ~m;
 355}
 356
 357/**
 358 * htb_activate_prios - creates active classe's feed chain
 359 *
 360 * The class is connected to ancestors and/or appropriate rows
 361 * for priorities it is participating on. cl->cmode must be new
 362 * (activated) mode. It does nothing if cl->prio_activity == 0.
 363 */
 364static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 365{
 366        struct htb_class *p = cl->parent;
 367        long m, mask = cl->prio_activity;
 368
 369        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 370                m = mask;
 371                while (m) {
 372                        int prio = ffz(~m);
 373                        m &= ~(1 << prio);
 374
 375                        if (p->un.inner.feed[prio].rb_node)
 376                                /* parent already has its feed in use so that
 377                                   reset bit in mask as parent is already ok */
 378                                mask &= ~(1 << prio);
 379
 380                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 381                }
 382                p->prio_activity |= mask;
 383                cl = p;
 384                p = cl->parent;
 385
 386        }
 387        if (cl->cmode == HTB_CAN_SEND && mask)
 388                htb_add_class_to_row(q, cl, mask);
 389}
 390
 391/**
 392 * htb_deactivate_prios - remove class from feed chain
 393 *
 394 * cl->cmode must represent old mode (before deactivation). It does
 395 * nothing if cl->prio_activity == 0. Class is removed from all feed
 396 * chains and rows.
 397 */
 398static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 399{
 400        struct htb_class *p = cl->parent;
 401        long m, mask = cl->prio_activity;
 402
 403        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 404                m = mask;
 405                mask = 0;
 406                while (m) {
 407                        int prio = ffz(~m);
 408                        m &= ~(1 << prio);
 409
 410                        if (p->un.inner.ptr[prio] == cl->node + prio) {
 411                                /* we are removing child which is pointed to from
 412                                   parent feed - forget the pointer but remember
 413                                   classid */
 414                                p->un.inner.last_ptr_id[prio] = cl->common.classid;
 415                                p->un.inner.ptr[prio] = NULL;
 416                        }
 417
 418                        htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
 419
 420                        if (!p->un.inner.feed[prio].rb_node)
 421                                mask |= 1 << prio;
 422                }
 423
 424                p->prio_activity &= ~mask;
 425                cl = p;
 426                p = cl->parent;
 427
 428        }
 429        if (cl->cmode == HTB_CAN_SEND && mask)
 430                htb_remove_class_from_row(q, cl, mask);
 431}
 432
 433static inline long htb_lowater(const struct htb_class *cl)
 434{
 435        if (htb_hysteresis)
 436                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 437        else
 438                return 0;
 439}
 440static inline long htb_hiwater(const struct htb_class *cl)
 441{
 442        if (htb_hysteresis)
 443                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 444        else
 445                return 0;
 446}
 447
 448
 449/**
 450 * htb_class_mode - computes and returns current class mode
 451 *
 452 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 453 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 454 * from now to time when cl will change its state.
 455 * Also it is worth to note that class mode doesn't change simply
 456 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 457 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 458 * mode transitions per time unit. The speed gain is about 1/6.
 459 */
 460static inline enum htb_cmode
 461htb_class_mode(struct htb_class *cl, long *diff)
 462{
 463        long toks;
 464
 465        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 466                *diff = -toks;
 467                return HTB_CANT_SEND;
 468        }
 469
 470        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 471                return HTB_CAN_SEND;
 472
 473        *diff = -toks;
 474        return HTB_MAY_BORROW;
 475}
 476
 477/**
 478 * htb_change_class_mode - changes classe's mode
 479 *
 480 * This should be the only way how to change classe's mode under normal
 481 * cirsumstances. Routine will update feed lists linkage, change mode
 482 * and add class to the wait event queue if appropriate. New mode should
 483 * be different from old one and cl->pq_key has to be valid if changing
 484 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 485 */
 486static void
 487htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
 488{
 489        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 490
 491        if (new_mode == cl->cmode)
 492                return;
 493
 494        if (cl->prio_activity) {        /* not necessary: speed optimization */
 495                if (cl->cmode != HTB_CANT_SEND)
 496                        htb_deactivate_prios(q, cl);
 497                cl->cmode = new_mode;
 498                if (new_mode != HTB_CANT_SEND)
 499                        htb_activate_prios(q, cl);
 500        } else
 501                cl->cmode = new_mode;
 502}
 503
 504/**
 505 * htb_activate - inserts leaf cl into appropriate active feeds
 506 *
 507 * Routine learns (new) priority of leaf and activates feed chain
 508 * for the prio. It can be called on already active leaf safely.
 509 * It also adds leaf into droplist.
 510 */
 511static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 512{
 513        WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 514
 515        if (!cl->prio_activity) {
 516                cl->prio_activity = 1 << cl->prio;
 517                htb_activate_prios(q, cl);
 518                list_add_tail(&cl->un.leaf.drop_list,
 519                              q->drops + cl->prio);
 520        }
 521}
 522
 523/**
 524 * htb_deactivate - remove leaf cl from active feeds
 525 *
 526 * Make sure that leaf is active. In the other words it can't be called
 527 * with non-active leaf. It also removes class from the drop list.
 528 */
 529static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 530{
 531        WARN_ON(!cl->prio_activity);
 532
 533        htb_deactivate_prios(q, cl);
 534        cl->prio_activity = 0;
 535        list_del_init(&cl->un.leaf.drop_list);
 536}
 537
 538static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 539{
 540        int uninitialized_var(ret);
 541        struct htb_sched *q = qdisc_priv(sch);
 542        struct htb_class *cl = htb_classify(skb, sch, &ret);
 543
 544        if (cl == HTB_DIRECT) {
 545                /* enqueue to helper queue */
 546                if (q->direct_queue.qlen < q->direct_qlen) {
 547                        __skb_queue_tail(&q->direct_queue, skb);
 548                        q->direct_pkts++;
 549                } else {
 550                        kfree_skb(skb);
 551                        sch->qstats.drops++;
 552                        return NET_XMIT_DROP;
 553                }
 554#ifdef CONFIG_NET_CLS_ACT
 555        } else if (!cl) {
 556                if (ret & __NET_XMIT_BYPASS)
 557                        sch->qstats.drops++;
 558                kfree_skb(skb);
 559                return ret;
 560#endif
 561        } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
 562                if (net_xmit_drop_count(ret)) {
 563                        sch->qstats.drops++;
 564                        cl->qstats.drops++;
 565                }
 566                return ret;
 567        } else {
 568                cl->bstats.packets +=
 569                        skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 570                cl->bstats.bytes += qdisc_pkt_len(skb);
 571                htb_activate(q, cl);
 572        }
 573
 574        sch->q.qlen++;
 575        sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 576        sch->bstats.bytes += qdisc_pkt_len(skb);
 577        return NET_XMIT_SUCCESS;
 578}
 579
 580static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
 581{
 582        long toks = diff + cl->tokens;
 583
 584        if (toks > cl->buffer)
 585                toks = cl->buffer;
 586        toks -= (long) qdisc_l2t(cl->rate, bytes);
 587        if (toks <= -cl->mbuffer)
 588                toks = 1 - cl->mbuffer;
 589
 590        cl->tokens = toks;
 591}
 592
 593static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
 594{
 595        long toks = diff + cl->ctokens;
 596
 597        if (toks > cl->cbuffer)
 598                toks = cl->cbuffer;
 599        toks -= (long) qdisc_l2t(cl->ceil, bytes);
 600        if (toks <= -cl->mbuffer)
 601                toks = 1 - cl->mbuffer;
 602
 603        cl->ctokens = toks;
 604}
 605
 606/**
 607 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 608 *
 609 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 610 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 611 * leaf and all ancestors and to rate bucket for ancestors at levels
 612 * "level" and higher. It also handles possible change of mode resulting
 613 * from the update. Note that mode can also increase here (MAY_BORROW to
 614 * CAN_SEND) because we can use more precise clock that event queue here.
 615 * In such case we remove class from event queue first.
 616 */
 617static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 618                             int level, struct sk_buff *skb)
 619{
 620        int bytes = qdisc_pkt_len(skb);
 621        enum htb_cmode old_mode;
 622        long diff;
 623
 624        while (cl) {
 625                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 626                if (cl->level >= level) {
 627                        if (cl->level == level)
 628                                cl->xstats.lends++;
 629                        htb_accnt_tokens(cl, bytes, diff);
 630                } else {
 631                        cl->xstats.borrows++;
 632                        cl->tokens += diff;     /* we moved t_c; update tokens */
 633                }
 634                htb_accnt_ctokens(cl, bytes, diff);
 635                cl->t_c = q->now;
 636
 637                old_mode = cl->cmode;
 638                diff = 0;
 639                htb_change_class_mode(q, cl, &diff);
 640                if (old_mode != cl->cmode) {
 641                        if (old_mode != HTB_CAN_SEND)
 642                                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 643                        if (cl->cmode != HTB_CAN_SEND)
 644                                htb_add_to_wait_tree(q, cl, diff);
 645                }
 646
 647                /* update byte stats except for leaves which are already updated */
 648                if (cl->level) {
 649                        cl->bstats.bytes += bytes;
 650                        cl->bstats.packets += skb_is_gso(skb)?
 651                                        skb_shinfo(skb)->gso_segs:1;
 652                }
 653                cl = cl->parent;
 654        }
 655}
 656
 657/**
 658 * htb_do_events - make mode changes to classes at the level
 659 *
 660 * Scans event queue for pending events and applies them. Returns time of
 661 * next pending event (0 for no event in pq).
 662 * Note: Applied are events whose have cl->pq_key <= q->now.
 663 */
 664static psched_time_t htb_do_events(struct htb_sched *q, int level,
 665                                   unsigned long start)
 666{
 667        /* don't run for longer than 2 jiffies; 2 is used instead of
 668           1 to simplify things when jiffy is going to be incremented
 669           too soon */
 670        unsigned long stop_at = start + 2;
 671        while (time_before(jiffies, stop_at)) {
 672                struct htb_class *cl;
 673                long diff;
 674                struct rb_node *p = rb_first(&q->wait_pq[level]);
 675
 676                if (!p)
 677                        return 0;
 678
 679                cl = rb_entry(p, struct htb_class, pq_node);
 680                if (cl->pq_key > q->now)
 681                        return cl->pq_key;
 682
 683                htb_safe_rb_erase(p, q->wait_pq + level);
 684                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 685                htb_change_class_mode(q, cl, &diff);
 686                if (cl->cmode != HTB_CAN_SEND)
 687                        htb_add_to_wait_tree(q, cl, diff);
 688        }
 689        /* too much load - let's continue on next jiffie (including above) */
 690        return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ;
 691}
 692
 693/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 694   is no such one exists. */
 695static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 696                                              u32 id)
 697{
 698        struct rb_node *r = NULL;
 699        while (n) {
 700                struct htb_class *cl =
 701                    rb_entry(n, struct htb_class, node[prio]);
 702
 703                if (id > cl->common.classid) {
 704                        n = n->rb_right;
 705                } else if (id < cl->common.classid) {
 706                        r = n;
 707                        n = n->rb_left;
 708                } else {
 709                        return n;
 710                }
 711        }
 712        return r;
 713}
 714
 715/**
 716 * htb_lookup_leaf - returns next leaf class in DRR order
 717 *
 718 * Find leaf where current feed pointers points to.
 719 */
 720static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 721                                         struct rb_node **pptr, u32 * pid)
 722{
 723        int i;
 724        struct {
 725                struct rb_node *root;
 726                struct rb_node **pptr;
 727                u32 *pid;
 728        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 729
 730        BUG_ON(!tree->rb_node);
 731        sp->root = tree->rb_node;
 732        sp->pptr = pptr;
 733        sp->pid = pid;
 734
 735        for (i = 0; i < 65535; i++) {
 736                if (!*sp->pptr && *sp->pid) {
 737                        /* ptr was invalidated but id is valid - try to recover
 738                           the original or next ptr */
 739                        *sp->pptr =
 740                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 741                }
 742                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 743                                   can become out of date quickly */
 744                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 745                        *sp->pptr = sp->root;
 746                        while ((*sp->pptr)->rb_left)
 747                                *sp->pptr = (*sp->pptr)->rb_left;
 748                        if (sp > stk) {
 749                                sp--;
 750                                if (!*sp->pptr) {
 751                                        WARN_ON(1);
 752                                        return NULL;
 753                                }
 754                                htb_next_rb_node(sp->pptr);
 755                        }
 756                } else {
 757                        struct htb_class *cl;
 758                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 759                        if (!cl->level)
 760                                return cl;
 761                        (++sp)->root = cl->un.inner.feed[prio].rb_node;
 762                        sp->pptr = cl->un.inner.ptr + prio;
 763                        sp->pid = cl->un.inner.last_ptr_id + prio;
 764                }
 765        }
 766        WARN_ON(1);
 767        return NULL;
 768}
 769
 770/* dequeues packet at given priority and level; call only if
 771   you are sure that there is active class at prio/level */
 772static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 773                                        int level)
 774{
 775        struct sk_buff *skb = NULL;
 776        struct htb_class *cl, *start;
 777        /* look initial class up in the row */
 778        start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
 779                                     q->ptr[level] + prio,
 780                                     q->last_ptr_id[level] + prio);
 781
 782        do {
 783next:
 784                if (unlikely(!cl))
 785                        return NULL;
 786
 787                /* class can be empty - it is unlikely but can be true if leaf
 788                   qdisc drops packets in enqueue routine or if someone used
 789                   graft operation on the leaf since last dequeue;
 790                   simply deactivate and skip such class */
 791                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 792                        struct htb_class *next;
 793                        htb_deactivate(q, cl);
 794
 795                        /* row/level might become empty */
 796                        if ((q->row_mask[level] & (1 << prio)) == 0)
 797                                return NULL;
 798
 799                        next = htb_lookup_leaf(q->row[level] + prio,
 800                                               prio, q->ptr[level] + prio,
 801                                               q->last_ptr_id[level] + prio);
 802
 803                        if (cl == start)        /* fix start if we just deleted it */
 804                                start = next;
 805                        cl = next;
 806                        goto next;
 807                }
 808
 809                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
 810                if (likely(skb != NULL))
 811                        break;
 812                if (!cl->warned) {
 813                        printk(KERN_WARNING
 814                               "htb: class %X isn't work conserving ?!\n",
 815                               cl->common.classid);
 816                        cl->warned = 1;
 817                }
 818
 819                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 820                                  ptr[0]) + prio);
 821                cl = htb_lookup_leaf(q->row[level] + prio, prio,
 822                                     q->ptr[level] + prio,
 823                                     q->last_ptr_id[level] + prio);
 824
 825        } while (cl != start);
 826
 827        if (likely(skb != NULL)) {
 828                cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
 829                if (cl->un.leaf.deficit[level] < 0) {
 830                        cl->un.leaf.deficit[level] += cl->quantum;
 831                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 832                                          ptr[0]) + prio);
 833                }
 834                /* this used to be after charge_class but this constelation
 835                   gives us slightly better performance */
 836                if (!cl->un.leaf.q->q.qlen)
 837                        htb_deactivate(q, cl);
 838                htb_charge_class(q, cl, level, skb);
 839        }
 840        return skb;
 841}
 842
 843static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 844{
 845        struct sk_buff *skb = NULL;
 846        struct htb_sched *q = qdisc_priv(sch);
 847        int level;
 848        psched_time_t next_event;
 849        unsigned long start_at;
 850
 851        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 852        skb = __skb_dequeue(&q->direct_queue);
 853        if (skb != NULL) {
 854                sch->flags &= ~TCQ_F_THROTTLED;
 855                sch->q.qlen--;
 856                return skb;
 857        }
 858
 859        if (!sch->q.qlen)
 860                goto fin;
 861        q->now = psched_get_time();
 862        start_at = jiffies;
 863
 864        next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 865
 866        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 867                /* common case optimization - skip event handler quickly */
 868                int m;
 869                psched_time_t event;
 870
 871                if (q->now >= q->near_ev_cache[level]) {
 872                        event = htb_do_events(q, level, start_at);
 873                        if (!event)
 874                                event = q->now + PSCHED_TICKS_PER_SEC;
 875                        q->near_ev_cache[level] = event;
 876                } else
 877                        event = q->near_ev_cache[level];
 878
 879                if (next_event > event)
 880                        next_event = event;
 881
 882                m = ~q->row_mask[level];
 883                while (m != (int)(-1)) {
 884                        int prio = ffz(m);
 885                        m |= 1 << prio;
 886                        skb = htb_dequeue_tree(q, prio, level);
 887                        if (likely(skb != NULL)) {
 888                                sch->q.qlen--;
 889                                sch->flags &= ~TCQ_F_THROTTLED;
 890                                goto fin;
 891                        }
 892                }
 893        }
 894        sch->qstats.overlimits++;
 895        qdisc_watchdog_schedule(&q->watchdog, next_event);
 896fin:
 897        return skb;
 898}
 899
 900/* try to drop from each class (by prio) until one succeed */
 901static unsigned int htb_drop(struct Qdisc *sch)
 902{
 903        struct htb_sched *q = qdisc_priv(sch);
 904        int prio;
 905
 906        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 907                struct list_head *p;
 908                list_for_each(p, q->drops + prio) {
 909                        struct htb_class *cl = list_entry(p, struct htb_class,
 910                                                          un.leaf.drop_list);
 911                        unsigned int len;
 912                        if (cl->un.leaf.q->ops->drop &&
 913                            (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 914                                sch->q.qlen--;
 915                                if (!cl->un.leaf.q->q.qlen)
 916                                        htb_deactivate(q, cl);
 917                                return len;
 918                        }
 919                }
 920        }
 921        return 0;
 922}
 923
 924/* reset all classes */
 925/* always caled under BH & queue lock */
 926static void htb_reset(struct Qdisc *sch)
 927{
 928        struct htb_sched *q = qdisc_priv(sch);
 929        struct htb_class *cl;
 930        struct hlist_node *n;
 931        unsigned int i;
 932
 933        for (i = 0; i < q->clhash.hashsize; i++) {
 934                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
 935                        if (cl->level)
 936                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 937                        else {
 938                                if (cl->un.leaf.q)
 939                                        qdisc_reset(cl->un.leaf.q);
 940                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 941                        }
 942                        cl->prio_activity = 0;
 943                        cl->cmode = HTB_CAN_SEND;
 944
 945                }
 946        }
 947        qdisc_watchdog_cancel(&q->watchdog);
 948        __skb_queue_purge(&q->direct_queue);
 949        sch->q.qlen = 0;
 950        memset(q->row, 0, sizeof(q->row));
 951        memset(q->row_mask, 0, sizeof(q->row_mask));
 952        memset(q->wait_pq, 0, sizeof(q->wait_pq));
 953        memset(q->ptr, 0, sizeof(q->ptr));
 954        for (i = 0; i < TC_HTB_NUMPRIO; i++)
 955                INIT_LIST_HEAD(q->drops + i);
 956}
 957
 958static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 959        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
 960        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
 961        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 962        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 963};
 964
 965static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 966{
 967        struct htb_sched *q = qdisc_priv(sch);
 968        struct nlattr *tb[TCA_HTB_INIT + 1];
 969        struct tc_htb_glob *gopt;
 970        int err;
 971        int i;
 972
 973        if (!opt)
 974                return -EINVAL;
 975
 976        err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
 977        if (err < 0)
 978                return err;
 979
 980        if (tb[TCA_HTB_INIT] == NULL) {
 981                printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
 982                return -EINVAL;
 983        }
 984        gopt = nla_data(tb[TCA_HTB_INIT]);
 985        if (gopt->version != HTB_VER >> 16) {
 986                printk(KERN_ERR
 987                       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
 988                       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
 989                return -EINVAL;
 990        }
 991
 992        err = qdisc_class_hash_init(&q->clhash);
 993        if (err < 0)
 994                return err;
 995        for (i = 0; i < TC_HTB_NUMPRIO; i++)
 996                INIT_LIST_HEAD(q->drops + i);
 997
 998        qdisc_watchdog_init(&q->watchdog, sch);
 999        skb_queue_head_init(&q->direct_queue);
1000
1001        q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1002        if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1003                q->direct_qlen = 2;
1004
1005        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1006                q->rate2quantum = 1;
1007        q->defcls = gopt->defcls;
1008
1009        return 0;
1010}
1011
1012static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1013{
1014        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1015        struct htb_sched *q = qdisc_priv(sch);
1016        struct nlattr *nest;
1017        struct tc_htb_glob gopt;
1018
1019        spin_lock_bh(root_lock);
1020
1021        gopt.direct_pkts = q->direct_pkts;
1022        gopt.version = HTB_VER;
1023        gopt.rate2quantum = q->rate2quantum;
1024        gopt.defcls = q->defcls;
1025        gopt.debug = 0;
1026
1027        nest = nla_nest_start(skb, TCA_OPTIONS);
1028        if (nest == NULL)
1029                goto nla_put_failure;
1030        NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1031        nla_nest_end(skb, nest);
1032
1033        spin_unlock_bh(root_lock);
1034        return skb->len;
1035
1036nla_put_failure:
1037        spin_unlock_bh(root_lock);
1038        nla_nest_cancel(skb, nest);
1039        return -1;
1040}
1041
1042static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1043                          struct sk_buff *skb, struct tcmsg *tcm)
1044{
1045        struct htb_class *cl = (struct htb_class *)arg;
1046        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1047        struct nlattr *nest;
1048        struct tc_htb_opt opt;
1049
1050        spin_lock_bh(root_lock);
1051        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1052        tcm->tcm_handle = cl->common.classid;
1053        if (!cl->level && cl->un.leaf.q)
1054                tcm->tcm_info = cl->un.leaf.q->handle;
1055
1056        nest = nla_nest_start(skb, TCA_OPTIONS);
1057        if (nest == NULL)
1058                goto nla_put_failure;
1059
1060        memset(&opt, 0, sizeof(opt));
1061
1062        opt.rate = cl->rate->rate;
1063        opt.buffer = cl->buffer;
1064        opt.ceil = cl->ceil->rate;
1065        opt.cbuffer = cl->cbuffer;
1066        opt.quantum = cl->quantum;
1067        opt.prio = cl->prio;
1068        opt.level = cl->level;
1069        NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1070
1071        nla_nest_end(skb, nest);
1072        spin_unlock_bh(root_lock);
1073        return skb->len;
1074
1075nla_put_failure:
1076        spin_unlock_bh(root_lock);
1077        nla_nest_cancel(skb, nest);
1078        return -1;
1079}
1080
1081static int
1082htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1083{
1084        struct htb_class *cl = (struct htb_class *)arg;
1085
1086        if (!cl->level && cl->un.leaf.q)
1087                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1088        cl->xstats.tokens = cl->tokens;
1089        cl->xstats.ctokens = cl->ctokens;
1090
1091        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1092            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1093            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1094                return -1;
1095
1096        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1097}
1098
1099static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1100                     struct Qdisc **old)
1101{
1102        struct htb_class *cl = (struct htb_class *)arg;
1103
1104        if (cl && !cl->level) {
1105                if (new == NULL &&
1106                    (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1107                                             &pfifo_qdisc_ops,
1108                                             cl->common.classid))
1109                    == NULL)
1110                        return -ENOBUFS;
1111                sch_tree_lock(sch);
1112                *old = cl->un.leaf.q;
1113                cl->un.leaf.q = new;
1114                if (*old != NULL) {
1115                        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1116                        qdisc_reset(*old);
1117                }
1118                sch_tree_unlock(sch);
1119                return 0;
1120        }
1121        return -ENOENT;
1122}
1123
1124static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1125{
1126        struct htb_class *cl = (struct htb_class *)arg;
1127        return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1128}
1129
1130static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1131{
1132        struct htb_class *cl = (struct htb_class *)arg;
1133
1134        if (cl->un.leaf.q->q.qlen == 0)
1135                htb_deactivate(qdisc_priv(sch), cl);
1136}
1137
1138static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1139{
1140        struct htb_class *cl = htb_find(classid, sch);
1141        if (cl)
1142                cl->refcnt++;
1143        return (unsigned long)cl;
1144}
1145
1146static inline int htb_parent_last_child(struct htb_class *cl)
1147{
1148        if (!cl->parent)
1149                /* the root class */
1150                return 0;
1151        if (cl->parent->children > 1)
1152                /* not the last child */
1153                return 0;
1154        return 1;
1155}
1156
1157static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1158                               struct Qdisc *new_q)
1159{
1160        struct htb_class *parent = cl->parent;
1161
1162        WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1163
1164        if (parent->cmode != HTB_CAN_SEND)
1165                htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1166
1167        parent->level = 0;
1168        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1169        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1170        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1171        parent->tokens = parent->buffer;
1172        parent->ctokens = parent->cbuffer;
1173        parent->t_c = psched_get_time();
1174        parent->cmode = HTB_CAN_SEND;
1175}
1176
1177static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1178{
1179        if (!cl->level) {
1180                WARN_ON(!cl->un.leaf.q);
1181                qdisc_destroy(cl->un.leaf.q);
1182        }
1183        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1184        qdisc_put_rtab(cl->rate);
1185        qdisc_put_rtab(cl->ceil);
1186
1187        tcf_destroy_chain(&cl->filter_list);
1188        kfree(cl);
1189}
1190
1191/* always caled under BH & queue lock */
1192static void htb_destroy(struct Qdisc *sch)
1193{
1194        struct htb_sched *q = qdisc_priv(sch);
1195        struct hlist_node *n, *next;
1196        struct htb_class *cl;
1197        unsigned int i;
1198
1199        qdisc_watchdog_cancel(&q->watchdog);
1200        /* This line used to be after htb_destroy_class call below
1201           and surprisingly it worked in 2.4. But it must precede it
1202           because filter need its target class alive to be able to call
1203           unbind_filter on it (without Oops). */
1204        tcf_destroy_chain(&q->filter_list);
1205
1206        for (i = 0; i < q->clhash.hashsize; i++) {
1207                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1208                        tcf_destroy_chain(&cl->filter_list);
1209        }
1210        for (i = 0; i < q->clhash.hashsize; i++) {
1211                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1212                                          common.hnode)
1213                        htb_destroy_class(sch, cl);
1214        }
1215        qdisc_class_hash_destroy(&q->clhash);
1216        __skb_queue_purge(&q->direct_queue);
1217}
1218
1219static int htb_delete(struct Qdisc *sch, unsigned long arg)
1220{
1221        struct htb_sched *q = qdisc_priv(sch);
1222        struct htb_class *cl = (struct htb_class *)arg;
1223        unsigned int qlen;
1224        struct Qdisc *new_q = NULL;
1225        int last_child = 0;
1226
1227        // TODO: why don't allow to delete subtree ? references ? does
1228        // tc subsys quarantee us that in htb_destroy it holds no class
1229        // refs so that we can remove children safely there ?
1230        if (cl->children || cl->filter_cnt)
1231                return -EBUSY;
1232
1233        if (!cl->level && htb_parent_last_child(cl)) {
1234                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1235                                          &pfifo_qdisc_ops,
1236                                          cl->parent->common.classid);
1237                last_child = 1;
1238        }
1239
1240        sch_tree_lock(sch);
1241
1242        if (!cl->level) {
1243                qlen = cl->un.leaf.q->q.qlen;
1244                qdisc_reset(cl->un.leaf.q);
1245                qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1246        }
1247
1248        /* delete from hash and active; remainder in destroy_class */
1249        qdisc_class_hash_remove(&q->clhash, &cl->common);
1250        if (cl->parent)
1251                cl->parent->children--;
1252
1253        if (cl->prio_activity)
1254                htb_deactivate(q, cl);
1255
1256        if (cl->cmode != HTB_CAN_SEND)
1257                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1258
1259        if (last_child)
1260                htb_parent_to_leaf(q, cl, new_q);
1261
1262        if (--cl->refcnt == 0)
1263                htb_destroy_class(sch, cl);
1264
1265        sch_tree_unlock(sch);
1266        return 0;
1267}
1268
1269static void htb_put(struct Qdisc *sch, unsigned long arg)
1270{
1271        struct htb_class *cl = (struct htb_class *)arg;
1272
1273        if (--cl->refcnt == 0)
1274                htb_destroy_class(sch, cl);
1275}
1276
1277static int htb_change_class(struct Qdisc *sch, u32 classid,
1278                            u32 parentid, struct nlattr **tca,
1279                            unsigned long *arg)
1280{
1281        int err = -EINVAL;
1282        struct htb_sched *q = qdisc_priv(sch);
1283        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1284        struct nlattr *opt = tca[TCA_OPTIONS];
1285        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1286        struct nlattr *tb[TCA_HTB_RTAB + 1];
1287        struct tc_htb_opt *hopt;
1288
1289        /* extract all subattrs from opt attr */
1290        if (!opt)
1291                goto failure;
1292
1293        err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1294        if (err < 0)
1295                goto failure;
1296
1297        err = -EINVAL;
1298        if (tb[TCA_HTB_PARMS] == NULL)
1299                goto failure;
1300
1301        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1302
1303        hopt = nla_data(tb[TCA_HTB_PARMS]);
1304
1305        rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1306        ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1307        if (!rtab || !ctab)
1308                goto failure;
1309
1310        if (!cl) {              /* new class */
1311                struct Qdisc *new_q;
1312                int prio;
1313                struct {
1314                        struct nlattr           nla;
1315                        struct gnet_estimator   opt;
1316                } est = {
1317                        .nla = {
1318                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1319                                .nla_type       = TCA_RATE,
1320                        },
1321                        .opt = {
1322                                /* 4s interval, 16s averaging constant */
1323                                .interval       = 2,
1324                                .ewma_log       = 2,
1325                        },
1326                };
1327
1328                /* check for valid classid */
1329                if (!classid || TC_H_MAJ(classid ^ sch->handle)
1330                    || htb_find(classid, sch))
1331                        goto failure;
1332
1333                /* check maximal depth */
1334                if (parent && parent->parent && parent->parent->level < 2) {
1335                        printk(KERN_ERR "htb: tree is too deep\n");
1336                        goto failure;
1337                }
1338                err = -ENOBUFS;
1339                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1340                        goto failure;
1341
1342                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1343                                        qdisc_root_sleeping_lock(sch),
1344                                        tca[TCA_RATE] ? : &est.nla);
1345                if (err) {
1346                        kfree(cl);
1347                        goto failure;
1348                }
1349
1350                cl->refcnt = 1;
1351                cl->children = 0;
1352                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1353                RB_CLEAR_NODE(&cl->pq_node);
1354
1355                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1356                        RB_CLEAR_NODE(&cl->node[prio]);
1357
1358                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1359                   so that can't be used inside of sch_tree_lock
1360                   -- thanks to Karlis Peisenieks */
1361                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1362                                          &pfifo_qdisc_ops, classid);
1363                sch_tree_lock(sch);
1364                if (parent && !parent->level) {
1365                        unsigned int qlen = parent->un.leaf.q->q.qlen;
1366
1367                        /* turn parent into inner node */
1368                        qdisc_reset(parent->un.leaf.q);
1369                        qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1370                        qdisc_destroy(parent->un.leaf.q);
1371                        if (parent->prio_activity)
1372                                htb_deactivate(q, parent);
1373
1374                        /* remove from evt list because of level change */
1375                        if (parent->cmode != HTB_CAN_SEND) {
1376                                htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1377                                parent->cmode = HTB_CAN_SEND;
1378                        }
1379                        parent->level = (parent->parent ? parent->parent->level
1380                                         : TC_HTB_MAXDEPTH) - 1;
1381                        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1382                }
1383                /* leaf (we) needs elementary qdisc */
1384                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1385
1386                cl->common.classid = classid;
1387                cl->parent = parent;
1388
1389                /* set class to be in HTB_CAN_SEND state */
1390                cl->tokens = hopt->buffer;
1391                cl->ctokens = hopt->cbuffer;
1392                cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;        /* 1min */
1393                cl->t_c = psched_get_time();
1394                cl->cmode = HTB_CAN_SEND;
1395
1396                /* attach to the hash list and parent's family */
1397                qdisc_class_hash_insert(&q->clhash, &cl->common);
1398                if (parent)
1399                        parent->children++;
1400        } else {
1401                if (tca[TCA_RATE]) {
1402                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1403                                                    qdisc_root_sleeping_lock(sch),
1404                                                    tca[TCA_RATE]);
1405                        if (err)
1406                                return err;
1407                }
1408                sch_tree_lock(sch);
1409        }
1410
1411        /* it used to be a nasty bug here, we have to check that node
1412           is really leaf before changing cl->un.leaf ! */
1413        if (!cl->level) {
1414                cl->quantum = rtab->rate.rate / q->rate2quantum;
1415                if (!hopt->quantum && cl->quantum < 1000) {
1416                        printk(KERN_WARNING
1417                               "HTB: quantum of class %X is small. Consider r2q change.\n",
1418                               cl->common.classid);
1419                        cl->quantum = 1000;
1420                }
1421                if (!hopt->quantum && cl->quantum > 200000) {
1422                        printk(KERN_WARNING
1423                               "HTB: quantum of class %X is big. Consider r2q change.\n",
1424                               cl->common.classid);
1425                        cl->quantum = 200000;
1426                }
1427                if (hopt->quantum)
1428                        cl->quantum = hopt->quantum;
1429                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1430                        cl->prio = TC_HTB_NUMPRIO - 1;
1431        }
1432
1433        cl->buffer = hopt->buffer;
1434        cl->cbuffer = hopt->cbuffer;
1435        if (cl->rate)
1436                qdisc_put_rtab(cl->rate);
1437        cl->rate = rtab;
1438        if (cl->ceil)
1439                qdisc_put_rtab(cl->ceil);
1440        cl->ceil = ctab;
1441        sch_tree_unlock(sch);
1442
1443        qdisc_class_hash_grow(sch, &q->clhash);
1444
1445        *arg = (unsigned long)cl;
1446        return 0;
1447
1448failure:
1449        if (rtab)
1450                qdisc_put_rtab(rtab);
1451        if (ctab)
1452                qdisc_put_rtab(ctab);
1453        return err;
1454}
1455
1456static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1457{
1458        struct htb_sched *q = qdisc_priv(sch);
1459        struct htb_class *cl = (struct htb_class *)arg;
1460        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1461
1462        return fl;
1463}
1464
1465static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1466                                     u32 classid)
1467{
1468        struct htb_class *cl = htb_find(classid, sch);
1469
1470        /*if (cl && !cl->level) return 0;
1471           The line above used to be there to prevent attaching filters to
1472           leaves. But at least tc_index filter uses this just to get class
1473           for other reasons so that we have to allow for it.
1474           ----
1475           19.6.2002 As Werner explained it is ok - bind filter is just
1476           another way to "lock" the class - unlike "get" this lock can
1477           be broken by class during destroy IIUC.
1478         */
1479        if (cl)
1480                cl->filter_cnt++;
1481        return (unsigned long)cl;
1482}
1483
1484static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1485{
1486        struct htb_class *cl = (struct htb_class *)arg;
1487
1488        if (cl)
1489                cl->filter_cnt--;
1490}
1491
1492static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1493{
1494        struct htb_sched *q = qdisc_priv(sch);
1495        struct htb_class *cl;
1496        struct hlist_node *n;
1497        unsigned int i;
1498
1499        if (arg->stop)
1500                return;
1501
1502        for (i = 0; i < q->clhash.hashsize; i++) {
1503                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1504                        if (arg->count < arg->skip) {
1505                                arg->count++;
1506                                continue;
1507                        }
1508                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1509                                arg->stop = 1;
1510                                return;
1511                        }
1512                        arg->count++;
1513                }
1514        }
1515}
1516
1517static const struct Qdisc_class_ops htb_class_ops = {
1518        .graft          =       htb_graft,
1519        .leaf           =       htb_leaf,
1520        .qlen_notify    =       htb_qlen_notify,
1521        .get            =       htb_get,
1522        .put            =       htb_put,
1523        .change         =       htb_change_class,
1524        .delete         =       htb_delete,
1525        .walk           =       htb_walk,
1526        .tcf_chain      =       htb_find_tcf,
1527        .bind_tcf       =       htb_bind_filter,
1528        .unbind_tcf     =       htb_unbind_filter,
1529        .dump           =       htb_dump_class,
1530        .dump_stats     =       htb_dump_class_stats,
1531};
1532
1533static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1534        .next           =       NULL,
1535        .cl_ops         =       &htb_class_ops,
1536        .id             =       "htb",
1537        .priv_size      =       sizeof(struct htb_sched),
1538        .enqueue        =       htb_enqueue,
1539        .dequeue        =       htb_dequeue,
1540        .peek           =       qdisc_peek_dequeued,
1541        .drop           =       htb_drop,
1542        .init           =       htb_init,
1543        .reset          =       htb_reset,
1544        .destroy        =       htb_destroy,
1545        .change         =       NULL /* htb_change */,
1546        .dump           =       htb_dump,
1547        .owner          =       THIS_MODULE,
1548};
1549
1550static int __init htb_module_init(void)
1551{
1552        return register_qdisc(&htb_qdisc_ops);
1553}
1554static void __exit htb_module_exit(void)
1555{
1556        unregister_qdisc(&htb_qdisc_ops);
1557}
1558
1559module_init(htb_module_init)
1560module_exit(htb_module_exit)
1561MODULE_LICENSE("GPL");
1562