linux/net/sched/sch_htb.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Martin Devera, <devik@cdi.cz>
  10 *
  11 * Credits (in time order) for older HTB versions:
  12 *              Stef Coene <stef.coene@docum.org>
  13 *                      HTB support at LARTC mailing list
  14 *              Ondrej Kraus, <krauso@barr.cz>
  15 *                      found missing INIT_QDISC(htb)
  16 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  17 *                      helped a lot to locate nasty class stall bug
  18 *              Andi Kleen, Jamal Hadi, Bert Hubert
  19 *                      code review and helpful comments on shaping
  20 *              Tomasz Wrona, <tw@eter.tym.pl>
  21 *                      created test case so that I was able to fix nasty bug
  22 *              Wilfried Weissmann
  23 *                      spotted bug in dequeue code and helped with fix
  24 *              Jiri Fojtasek
  25 *                      fixed requeue routine
  26 *              and many others. thanks.
  27 */
  28#include <linux/module.h>
  29#include <linux/moduleparam.h>
  30#include <linux/types.h>
  31#include <linux/kernel.h>
  32#include <linux/string.h>
  33#include <linux/errno.h>
  34#include <linux/skbuff.h>
  35#include <linux/list.h>
  36#include <linux/compiler.h>
  37#include <linux/rbtree.h>
  38#include <net/netlink.h>
  39#include <net/pkt_sched.h>
  40
  41/* HTB algorithm.
  42    Author: devik@cdi.cz
  43    ========================================================================
  44    HTB is like TBF with multiple classes. It is also similar to CBQ because
  45    it allows to assign priority to each class in hierarchy.
  46    In fact it is another implementation of Floyd's formal sharing.
  47
  48    Levels:
  49    Each class is assigned level. Leaf has ALWAYS level 0 and root
  50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  51    one less than their parent.
  52*/
  53
  54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  55#define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
  56
  57#if HTB_VER >> 16 != TC_HTB_PROTOVER
  58#error "Mismatched sch_htb.c and pkt_sch.h"
  59#endif
  60
  61/* Module parameter and sysfs export */
  62module_param    (htb_hysteresis, int, 0640);
  63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  64
  65/* used internaly to keep status of single class */
  66enum htb_cmode {
  67        HTB_CANT_SEND,          /* class can't send and can't borrow */
  68        HTB_MAY_BORROW,         /* class can't send but may borrow */
  69        HTB_CAN_SEND            /* class can send */
  70};
  71
  72/* interior & leaf nodes; props specific to leaves are marked L: */
  73struct htb_class {
  74        struct Qdisc_class_common common;
  75        /* general class parameters */
  76        struct gnet_stats_basic bstats;
  77        struct gnet_stats_queue qstats;
  78        struct gnet_stats_rate_est rate_est;
  79        struct tc_htb_xstats xstats;    /* our special stats */
  80        int refcnt;             /* usage count of this class */
  81
  82        /* topology */
  83        int level;              /* our level (see above) */
  84        unsigned int children;
  85        struct htb_class *parent;       /* parent class */
  86
  87        union {
  88                struct htb_class_leaf {
  89                        struct Qdisc *q;
  90                        int prio;
  91                        int aprio;
  92                        int quantum;
  93                        int deficit[TC_HTB_MAXDEPTH];
  94                        struct list_head drop_list;
  95                } leaf;
  96                struct htb_class_inner {
  97                        struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
  98                        struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
  99                        /* When class changes from state 1->2 and disconnects from
 100                           parent's feed then we lost ptr value and start from the
 101                           first child again. Here we store classid of the
 102                           last valid ptr (used when ptr is NULL). */
 103                        u32 last_ptr_id[TC_HTB_NUMPRIO];
 104                } inner;
 105        } un;
 106        struct rb_node node[TC_HTB_NUMPRIO];    /* node for self or feed tree */
 107        struct rb_node pq_node; /* node for event queue */
 108        psched_time_t pq_key;
 109
 110        int prio_activity;      /* for which prios are we active */
 111        enum htb_cmode cmode;   /* current mode of the class */
 112
 113        /* class attached filters */
 114        struct tcf_proto *filter_list;
 115        int filter_cnt;
 116
 117        int warned;             /* only one warning about non work conserving .. */
 118
 119        /* token bucket parameters */
 120        struct qdisc_rate_table *rate;  /* rate table of the class itself */
 121        struct qdisc_rate_table *ceil;  /* ceiling rate (limits borrows too) */
 122        long buffer, cbuffer;   /* token bucket depth/rate */
 123        psched_tdiff_t mbuffer; /* max wait time */
 124        long tokens, ctokens;   /* current number of tokens */
 125        psched_time_t t_c;      /* checkpoint time */
 126
 127        int prio;               /* For parent to leaf return possible here */
 128        int quantum;            /* we do backup. Finally full replacement  */
 129                                /* of un.leaf originals should be done. */
 130};
 131
 132static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
 133                           int size)
 134{
 135        long result = qdisc_l2t(rate, size);
 136        return result;
 137}
 138
 139struct htb_sched {
 140        struct Qdisc_class_hash clhash;
 141        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 142
 143        /* self list - roots of self generating tree */
 144        struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 145        int row_mask[TC_HTB_MAXDEPTH];
 146        struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 147        u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 148
 149        /* self wait list - roots of wait PQs per row */
 150        struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 151
 152        /* time of nearest event per level (row) */
 153        psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 154
 155        /* whether we hit non-work conserving class during this dequeue; we use */
 156        int nwc_hit;            /* this to disable mindelay complaint in dequeue */
 157
 158        int defcls;             /* class where unclassified flows go to */
 159
 160        /* filters for qdisc itself */
 161        struct tcf_proto *filter_list;
 162
 163        int rate2quantum;       /* quant = rate / rate2quantum */
 164        psched_time_t now;      /* cached dequeue time */
 165        struct qdisc_watchdog watchdog;
 166
 167        /* non shaped skbs; let them go directly thru */
 168        struct sk_buff_head direct_queue;
 169        int direct_qlen;        /* max qlen of above */
 170
 171        long direct_pkts;
 172};
 173
 174/* find class in global hash table using given handle */
 175static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 176{
 177        struct htb_sched *q = qdisc_priv(sch);
 178        struct Qdisc_class_common *clc;
 179
 180        clc = qdisc_class_find(&q->clhash, handle);
 181        if (clc == NULL)
 182                return NULL;
 183        return container_of(clc, struct htb_class, common);
 184}
 185
 186/**
 187 * htb_classify - classify a packet into class
 188 *
 189 * It returns NULL if the packet should be dropped or -1 if the packet
 190 * should be passed directly thru. In all other cases leaf class is returned.
 191 * We allow direct class selection by classid in priority. The we examine
 192 * filters in qdisc and in inner nodes (if higher filter points to the inner
 193 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 194 * internal fifo (direct). These packets then go directly thru. If we still
 195 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
 196 * then finish and return direct queue.
 197 */
 198#define HTB_DIRECT (struct htb_class*)-1
 199
 200static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 201                                      int *qerr)
 202{
 203        struct htb_sched *q = qdisc_priv(sch);
 204        struct htb_class *cl;
 205        struct tcf_result res;
 206        struct tcf_proto *tcf;
 207        int result;
 208
 209        /* allow to select class by setting skb->priority to valid classid;
 210           note that nfmark can be used too by attaching filter fw with no
 211           rules in it */
 212        if (skb->priority == sch->handle)
 213                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 214        if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 215                return cl;
 216
 217        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 218        tcf = q->filter_list;
 219        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 220#ifdef CONFIG_NET_CLS_ACT
 221                switch (result) {
 222                case TC_ACT_QUEUED:
 223                case TC_ACT_STOLEN:
 224                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 225                case TC_ACT_SHOT:
 226                        return NULL;
 227                }
 228#endif
 229                if ((cl = (void *)res.class) == NULL) {
 230                        if (res.classid == sch->handle)
 231                                return HTB_DIRECT;      /* X:0 (direct flow) */
 232                        if ((cl = htb_find(res.classid, sch)) == NULL)
 233                                break;  /* filter selected invalid classid */
 234                }
 235                if (!cl->level)
 236                        return cl;      /* we hit leaf; return it */
 237
 238                /* we have got inner class; apply inner filter chain */
 239                tcf = cl->filter_list;
 240        }
 241        /* classification failed; try to use default class */
 242        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 243        if (!cl || cl->level)
 244                return HTB_DIRECT;      /* bad default .. this is safe bet */
 245        return cl;
 246}
 247
 248/**
 249 * htb_add_to_id_tree - adds class to the round robin list
 250 *
 251 * Routine adds class to the list (actually tree) sorted by classid.
 252 * Make sure that class is not already on such list for given prio.
 253 */
 254static void htb_add_to_id_tree(struct rb_root *root,
 255                               struct htb_class *cl, int prio)
 256{
 257        struct rb_node **p = &root->rb_node, *parent = NULL;
 258
 259        while (*p) {
 260                struct htb_class *c;
 261                parent = *p;
 262                c = rb_entry(parent, struct htb_class, node[prio]);
 263
 264                if (cl->common.classid > c->common.classid)
 265                        p = &parent->rb_right;
 266                else
 267                        p = &parent->rb_left;
 268        }
 269        rb_link_node(&cl->node[prio], parent, p);
 270        rb_insert_color(&cl->node[prio], root);
 271}
 272
 273/**
 274 * htb_add_to_wait_tree - adds class to the event queue with delay
 275 *
 276 * The class is added to priority event queue to indicate that class will
 277 * change its mode in cl->pq_key microseconds. Make sure that class is not
 278 * already in the queue.
 279 */
 280static void htb_add_to_wait_tree(struct htb_sched *q,
 281                                 struct htb_class *cl, long delay)
 282{
 283        struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 284
 285        cl->pq_key = q->now + delay;
 286        if (cl->pq_key == q->now)
 287                cl->pq_key++;
 288
 289        /* update the nearest event cache */
 290        if (q->near_ev_cache[cl->level] > cl->pq_key)
 291                q->near_ev_cache[cl->level] = cl->pq_key;
 292
 293        while (*p) {
 294                struct htb_class *c;
 295                parent = *p;
 296                c = rb_entry(parent, struct htb_class, pq_node);
 297                if (cl->pq_key >= c->pq_key)
 298                        p = &parent->rb_right;
 299                else
 300                        p = &parent->rb_left;
 301        }
 302        rb_link_node(&cl->pq_node, parent, p);
 303        rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
 304}
 305
 306/**
 307 * htb_next_rb_node - finds next node in binary tree
 308 *
 309 * When we are past last key we return NULL.
 310 * Average complexity is 2 steps per call.
 311 */
 312static inline void htb_next_rb_node(struct rb_node **n)
 313{
 314        *n = rb_next(*n);
 315}
 316
 317/**
 318 * htb_add_class_to_row - add class to its row
 319 *
 320 * The class is added to row at priorities marked in mask.
 321 * It does nothing if mask == 0.
 322 */
 323static inline void htb_add_class_to_row(struct htb_sched *q,
 324                                        struct htb_class *cl, int mask)
 325{
 326        q->row_mask[cl->level] |= mask;
 327        while (mask) {
 328                int prio = ffz(~mask);
 329                mask &= ~(1 << prio);
 330                htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 331        }
 332}
 333
 334/* If this triggers, it is a bug in this code, but it need not be fatal */
 335static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 336{
 337        if (RB_EMPTY_NODE(rb)) {
 338                WARN_ON(1);
 339        } else {
 340                rb_erase(rb, root);
 341                RB_CLEAR_NODE(rb);
 342        }
 343}
 344
 345
 346/**
 347 * htb_remove_class_from_row - removes class from its row
 348 *
 349 * The class is removed from row at priorities marked in mask.
 350 * It does nothing if mask == 0.
 351 */
 352static inline void htb_remove_class_from_row(struct htb_sched *q,
 353                                                 struct htb_class *cl, int mask)
 354{
 355        int m = 0;
 356
 357        while (mask) {
 358                int prio = ffz(~mask);
 359
 360                mask &= ~(1 << prio);
 361                if (q->ptr[cl->level][prio] == cl->node + prio)
 362                        htb_next_rb_node(q->ptr[cl->level] + prio);
 363
 364                htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
 365                if (!q->row[cl->level][prio].rb_node)
 366                        m |= 1 << prio;
 367        }
 368        q->row_mask[cl->level] &= ~m;
 369}
 370
 371/**
 372 * htb_activate_prios - creates active classe's feed chain
 373 *
 374 * The class is connected to ancestors and/or appropriate rows
 375 * for priorities it is participating on. cl->cmode must be new
 376 * (activated) mode. It does nothing if cl->prio_activity == 0.
 377 */
 378static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 379{
 380        struct htb_class *p = cl->parent;
 381        long m, mask = cl->prio_activity;
 382
 383        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 384                m = mask;
 385                while (m) {
 386                        int prio = ffz(~m);
 387                        m &= ~(1 << prio);
 388
 389                        if (p->un.inner.feed[prio].rb_node)
 390                                /* parent already has its feed in use so that
 391                                   reset bit in mask as parent is already ok */
 392                                mask &= ~(1 << prio);
 393
 394                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 395                }
 396                p->prio_activity |= mask;
 397                cl = p;
 398                p = cl->parent;
 399
 400        }
 401        if (cl->cmode == HTB_CAN_SEND && mask)
 402                htb_add_class_to_row(q, cl, mask);
 403}
 404
 405/**
 406 * htb_deactivate_prios - remove class from feed chain
 407 *
 408 * cl->cmode must represent old mode (before deactivation). It does
 409 * nothing if cl->prio_activity == 0. Class is removed from all feed
 410 * chains and rows.
 411 */
 412static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 413{
 414        struct htb_class *p = cl->parent;
 415        long m, mask = cl->prio_activity;
 416
 417        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 418                m = mask;
 419                mask = 0;
 420                while (m) {
 421                        int prio = ffz(~m);
 422                        m &= ~(1 << prio);
 423
 424                        if (p->un.inner.ptr[prio] == cl->node + prio) {
 425                                /* we are removing child which is pointed to from
 426                                   parent feed - forget the pointer but remember
 427                                   classid */
 428                                p->un.inner.last_ptr_id[prio] = cl->common.classid;
 429                                p->un.inner.ptr[prio] = NULL;
 430                        }
 431
 432                        htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
 433
 434                        if (!p->un.inner.feed[prio].rb_node)
 435                                mask |= 1 << prio;
 436                }
 437
 438                p->prio_activity &= ~mask;
 439                cl = p;
 440                p = cl->parent;
 441
 442        }
 443        if (cl->cmode == HTB_CAN_SEND && mask)
 444                htb_remove_class_from_row(q, cl, mask);
 445}
 446
 447static inline long htb_lowater(const struct htb_class *cl)
 448{
 449        if (htb_hysteresis)
 450                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 451        else
 452                return 0;
 453}
 454static inline long htb_hiwater(const struct htb_class *cl)
 455{
 456        if (htb_hysteresis)
 457                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 458        else
 459                return 0;
 460}
 461
 462
 463/**
 464 * htb_class_mode - computes and returns current class mode
 465 *
 466 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 467 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 468 * from now to time when cl will change its state.
 469 * Also it is worth to note that class mode doesn't change simply
 470 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 471 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 472 * mode transitions per time unit. The speed gain is about 1/6.
 473 */
 474static inline enum htb_cmode
 475htb_class_mode(struct htb_class *cl, long *diff)
 476{
 477        long toks;
 478
 479        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 480                *diff = -toks;
 481                return HTB_CANT_SEND;
 482        }
 483
 484        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 485                return HTB_CAN_SEND;
 486
 487        *diff = -toks;
 488        return HTB_MAY_BORROW;
 489}
 490
 491/**
 492 * htb_change_class_mode - changes classe's mode
 493 *
 494 * This should be the only way how to change classe's mode under normal
 495 * cirsumstances. Routine will update feed lists linkage, change mode
 496 * and add class to the wait event queue if appropriate. New mode should
 497 * be different from old one and cl->pq_key has to be valid if changing
 498 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 499 */
 500static void
 501htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
 502{
 503        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 504
 505        if (new_mode == cl->cmode)
 506                return;
 507
 508        if (cl->prio_activity) {        /* not necessary: speed optimization */
 509                if (cl->cmode != HTB_CANT_SEND)
 510                        htb_deactivate_prios(q, cl);
 511                cl->cmode = new_mode;
 512                if (new_mode != HTB_CANT_SEND)
 513                        htb_activate_prios(q, cl);
 514        } else
 515                cl->cmode = new_mode;
 516}
 517
 518/**
 519 * htb_activate - inserts leaf cl into appropriate active feeds
 520 *
 521 * Routine learns (new) priority of leaf and activates feed chain
 522 * for the prio. It can be called on already active leaf safely.
 523 * It also adds leaf into droplist.
 524 */
 525static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 526{
 527        WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 528
 529        if (!cl->prio_activity) {
 530                cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
 531                htb_activate_prios(q, cl);
 532                list_add_tail(&cl->un.leaf.drop_list,
 533                              q->drops + cl->un.leaf.aprio);
 534        }
 535}
 536
 537/**
 538 * htb_deactivate - remove leaf cl from active feeds
 539 *
 540 * Make sure that leaf is active. In the other words it can't be called
 541 * with non-active leaf. It also removes class from the drop list.
 542 */
 543static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 544{
 545        WARN_ON(!cl->prio_activity);
 546
 547        htb_deactivate_prios(q, cl);
 548        cl->prio_activity = 0;
 549        list_del_init(&cl->un.leaf.drop_list);
 550}
 551
 552static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 553{
 554        int ret;
 555        struct htb_sched *q = qdisc_priv(sch);
 556        struct htb_class *cl = htb_classify(skb, sch, &ret);
 557
 558        if (cl == HTB_DIRECT) {
 559                /* enqueue to helper queue */
 560                if (q->direct_queue.qlen < q->direct_qlen) {
 561                        __skb_queue_tail(&q->direct_queue, skb);
 562                        q->direct_pkts++;
 563                } else {
 564                        kfree_skb(skb);
 565                        sch->qstats.drops++;
 566                        return NET_XMIT_DROP;
 567                }
 568#ifdef CONFIG_NET_CLS_ACT
 569        } else if (!cl) {
 570                if (ret & __NET_XMIT_BYPASS)
 571                        sch->qstats.drops++;
 572                kfree_skb(skb);
 573                return ret;
 574#endif
 575        } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
 576                if (net_xmit_drop_count(ret)) {
 577                        sch->qstats.drops++;
 578                        cl->qstats.drops++;
 579                }
 580                return ret;
 581        } else {
 582                cl->bstats.packets +=
 583                        skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 584                cl->bstats.bytes += qdisc_pkt_len(skb);
 585                htb_activate(q, cl);
 586        }
 587
 588        sch->q.qlen++;
 589        sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 590        sch->bstats.bytes += qdisc_pkt_len(skb);
 591        return NET_XMIT_SUCCESS;
 592}
 593
 594/* TODO: requeuing packet charges it to policers again !! */
 595static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 596{
 597        int ret;
 598        struct htb_sched *q = qdisc_priv(sch);
 599        struct htb_class *cl = htb_classify(skb, sch, &ret);
 600        struct sk_buff *tskb;
 601
 602        if (cl == HTB_DIRECT) {
 603                /* enqueue to helper queue */
 604                if (q->direct_queue.qlen < q->direct_qlen) {
 605                        __skb_queue_head(&q->direct_queue, skb);
 606                } else {
 607                        __skb_queue_head(&q->direct_queue, skb);
 608                        tskb = __skb_dequeue_tail(&q->direct_queue);
 609                        kfree_skb(tskb);
 610                        sch->qstats.drops++;
 611                        return NET_XMIT_CN;
 612                }
 613#ifdef CONFIG_NET_CLS_ACT
 614        } else if (!cl) {
 615                if (ret & __NET_XMIT_BYPASS)
 616                        sch->qstats.drops++;
 617                kfree_skb(skb);
 618                return ret;
 619#endif
 620        } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
 621                   NET_XMIT_SUCCESS) {
 622                if (net_xmit_drop_count(ret)) {
 623                        sch->qstats.drops++;
 624                        cl->qstats.drops++;
 625                }
 626                return ret;
 627        } else
 628                htb_activate(q, cl);
 629
 630        sch->q.qlen++;
 631        sch->qstats.requeues++;
 632        return NET_XMIT_SUCCESS;
 633}
 634
 635/**
 636 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 637 *
 638 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 639 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 640 * leaf and all ancestors and to rate bucket for ancestors at levels
 641 * "level" and higher. It also handles possible change of mode resulting
 642 * from the update. Note that mode can also increase here (MAY_BORROW to
 643 * CAN_SEND) because we can use more precise clock that event queue here.
 644 * In such case we remove class from event queue first.
 645 */
 646static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 647                             int level, struct sk_buff *skb)
 648{
 649        int bytes = qdisc_pkt_len(skb);
 650        long toks, diff;
 651        enum htb_cmode old_mode;
 652
 653#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
 654        if (toks > cl->B) toks = cl->B; \
 655        toks -= L2T(cl, cl->R, bytes); \
 656        if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
 657        cl->T = toks
 658
 659        while (cl) {
 660                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 661                if (cl->level >= level) {
 662                        if (cl->level == level)
 663                                cl->xstats.lends++;
 664                        HTB_ACCNT(tokens, buffer, rate);
 665                } else {
 666                        cl->xstats.borrows++;
 667                        cl->tokens += diff;     /* we moved t_c; update tokens */
 668                }
 669                HTB_ACCNT(ctokens, cbuffer, ceil);
 670                cl->t_c = q->now;
 671
 672                old_mode = cl->cmode;
 673                diff = 0;
 674                htb_change_class_mode(q, cl, &diff);
 675                if (old_mode != cl->cmode) {
 676                        if (old_mode != HTB_CAN_SEND)
 677                                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 678                        if (cl->cmode != HTB_CAN_SEND)
 679                                htb_add_to_wait_tree(q, cl, diff);
 680                }
 681
 682                /* update byte stats except for leaves which are already updated */
 683                if (cl->level) {
 684                        cl->bstats.bytes += bytes;
 685                        cl->bstats.packets += skb_is_gso(skb)?
 686                                        skb_shinfo(skb)->gso_segs:1;
 687                }
 688                cl = cl->parent;
 689        }
 690}
 691
 692/**
 693 * htb_do_events - make mode changes to classes at the level
 694 *
 695 * Scans event queue for pending events and applies them. Returns time of
 696 * next pending event (0 for no event in pq).
 697 * Note: Applied are events whose have cl->pq_key <= q->now.
 698 */
 699static psched_time_t htb_do_events(struct htb_sched *q, int level)
 700{
 701        /* don't run for longer than 2 jiffies; 2 is used instead of
 702           1 to simplify things when jiffy is going to be incremented
 703           too soon */
 704        unsigned long stop_at = jiffies + 2;
 705        while (time_before(jiffies, stop_at)) {
 706                struct htb_class *cl;
 707                long diff;
 708                struct rb_node *p = rb_first(&q->wait_pq[level]);
 709
 710                if (!p)
 711                        return 0;
 712
 713                cl = rb_entry(p, struct htb_class, pq_node);
 714                if (cl->pq_key > q->now)
 715                        return cl->pq_key;
 716
 717                htb_safe_rb_erase(p, q->wait_pq + level);
 718                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 719                htb_change_class_mode(q, cl, &diff);
 720                if (cl->cmode != HTB_CAN_SEND)
 721                        htb_add_to_wait_tree(q, cl, diff);
 722        }
 723        /* too much load - let's continue on next jiffie */
 724        return q->now + PSCHED_TICKS_PER_SEC / HZ;
 725}
 726
 727/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 728   is no such one exists. */
 729static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 730                                              u32 id)
 731{
 732        struct rb_node *r = NULL;
 733        while (n) {
 734                struct htb_class *cl =
 735                    rb_entry(n, struct htb_class, node[prio]);
 736                if (id == cl->common.classid)
 737                        return n;
 738
 739                if (id > cl->common.classid) {
 740                        n = n->rb_right;
 741                } else {
 742                        r = n;
 743                        n = n->rb_left;
 744                }
 745        }
 746        return r;
 747}
 748
 749/**
 750 * htb_lookup_leaf - returns next leaf class in DRR order
 751 *
 752 * Find leaf where current feed pointers points to.
 753 */
 754static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 755                                         struct rb_node **pptr, u32 * pid)
 756{
 757        int i;
 758        struct {
 759                struct rb_node *root;
 760                struct rb_node **pptr;
 761                u32 *pid;
 762        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 763
 764        WARN_ON(!tree->rb_node);
 765        sp->root = tree->rb_node;
 766        sp->pptr = pptr;
 767        sp->pid = pid;
 768
 769        for (i = 0; i < 65535; i++) {
 770                if (!*sp->pptr && *sp->pid) {
 771                        /* ptr was invalidated but id is valid - try to recover
 772                           the original or next ptr */
 773                        *sp->pptr =
 774                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 775                }
 776                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 777                                   can become out of date quickly */
 778                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 779                        *sp->pptr = sp->root;
 780                        while ((*sp->pptr)->rb_left)
 781                                *sp->pptr = (*sp->pptr)->rb_left;
 782                        if (sp > stk) {
 783                                sp--;
 784                                WARN_ON(!*sp->pptr);
 785                                if (!*sp->pptr)
 786                                        return NULL;
 787                                htb_next_rb_node(sp->pptr);
 788                        }
 789                } else {
 790                        struct htb_class *cl;
 791                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 792                        if (!cl->level)
 793                                return cl;
 794                        (++sp)->root = cl->un.inner.feed[prio].rb_node;
 795                        sp->pptr = cl->un.inner.ptr + prio;
 796                        sp->pid = cl->un.inner.last_ptr_id + prio;
 797                }
 798        }
 799        WARN_ON(1);
 800        return NULL;
 801}
 802
 803/* dequeues packet at given priority and level; call only if
 804   you are sure that there is active class at prio/level */
 805static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 806                                        int level)
 807{
 808        struct sk_buff *skb = NULL;
 809        struct htb_class *cl, *start;
 810        /* look initial class up in the row */
 811        start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
 812                                     q->ptr[level] + prio,
 813                                     q->last_ptr_id[level] + prio);
 814
 815        do {
 816next:
 817                WARN_ON(!cl);
 818                if (!cl)
 819                        return NULL;
 820
 821                /* class can be empty - it is unlikely but can be true if leaf
 822                   qdisc drops packets in enqueue routine or if someone used
 823                   graft operation on the leaf since last dequeue;
 824                   simply deactivate and skip such class */
 825                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 826                        struct htb_class *next;
 827                        htb_deactivate(q, cl);
 828
 829                        /* row/level might become empty */
 830                        if ((q->row_mask[level] & (1 << prio)) == 0)
 831                                return NULL;
 832
 833                        next = htb_lookup_leaf(q->row[level] + prio,
 834                                               prio, q->ptr[level] + prio,
 835                                               q->last_ptr_id[level] + prio);
 836
 837                        if (cl == start)        /* fix start if we just deleted it */
 838                                start = next;
 839                        cl = next;
 840                        goto next;
 841                }
 842
 843                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
 844                if (likely(skb != NULL))
 845                        break;
 846                if (!cl->warned) {
 847                        printk(KERN_WARNING
 848                               "htb: class %X isn't work conserving ?!\n",
 849                               cl->common.classid);
 850                        cl->warned = 1;
 851                }
 852                q->nwc_hit++;
 853                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 854                                  ptr[0]) + prio);
 855                cl = htb_lookup_leaf(q->row[level] + prio, prio,
 856                                     q->ptr[level] + prio,
 857                                     q->last_ptr_id[level] + prio);
 858
 859        } while (cl != start);
 860
 861        if (likely(skb != NULL)) {
 862                cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
 863                if (cl->un.leaf.deficit[level] < 0) {
 864                        cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
 865                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 866                                          ptr[0]) + prio);
 867                }
 868                /* this used to be after charge_class but this constelation
 869                   gives us slightly better performance */
 870                if (!cl->un.leaf.q->q.qlen)
 871                        htb_deactivate(q, cl);
 872                htb_charge_class(q, cl, level, skb);
 873        }
 874        return skb;
 875}
 876
 877static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 878{
 879        struct sk_buff *skb = NULL;
 880        struct htb_sched *q = qdisc_priv(sch);
 881        int level;
 882        psched_time_t next_event;
 883
 884        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 885        skb = __skb_dequeue(&q->direct_queue);
 886        if (skb != NULL) {
 887                sch->flags &= ~TCQ_F_THROTTLED;
 888                sch->q.qlen--;
 889                return skb;
 890        }
 891
 892        if (!sch->q.qlen)
 893                goto fin;
 894        q->now = psched_get_time();
 895
 896        next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 897        q->nwc_hit = 0;
 898        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 899                /* common case optimization - skip event handler quickly */
 900                int m;
 901                psched_time_t event;
 902
 903                if (q->now >= q->near_ev_cache[level]) {
 904                        event = htb_do_events(q, level);
 905                        if (!event)
 906                                event = q->now + PSCHED_TICKS_PER_SEC;
 907                        q->near_ev_cache[level] = event;
 908                } else
 909                        event = q->near_ev_cache[level];
 910
 911                if (event && next_event > event)
 912                        next_event = event;
 913
 914                m = ~q->row_mask[level];
 915                while (m != (int)(-1)) {
 916                        int prio = ffz(m);
 917                        m |= 1 << prio;
 918                        skb = htb_dequeue_tree(q, prio, level);
 919                        if (likely(skb != NULL)) {
 920                                sch->q.qlen--;
 921                                sch->flags &= ~TCQ_F_THROTTLED;
 922                                goto fin;
 923                        }
 924                }
 925        }
 926        sch->qstats.overlimits++;
 927        qdisc_watchdog_schedule(&q->watchdog, next_event);
 928fin:
 929        return skb;
 930}
 931
 932/* try to drop from each class (by prio) until one succeed */
 933static unsigned int htb_drop(struct Qdisc *sch)
 934{
 935        struct htb_sched *q = qdisc_priv(sch);
 936        int prio;
 937
 938        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 939                struct list_head *p;
 940                list_for_each(p, q->drops + prio) {
 941                        struct htb_class *cl = list_entry(p, struct htb_class,
 942                                                          un.leaf.drop_list);
 943                        unsigned int len;
 944                        if (cl->un.leaf.q->ops->drop &&
 945                            (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 946                                sch->q.qlen--;
 947                                if (!cl->un.leaf.q->q.qlen)
 948                                        htb_deactivate(q, cl);
 949                                return len;
 950                        }
 951                }
 952        }
 953        return 0;
 954}
 955
 956/* reset all classes */
 957/* always caled under BH & queue lock */
 958static void htb_reset(struct Qdisc *sch)
 959{
 960        struct htb_sched *q = qdisc_priv(sch);
 961        struct htb_class *cl;
 962        struct hlist_node *n;
 963        unsigned int i;
 964
 965        for (i = 0; i < q->clhash.hashsize; i++) {
 966                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
 967                        if (cl->level)
 968                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 969                        else {
 970                                if (cl->un.leaf.q)
 971                                        qdisc_reset(cl->un.leaf.q);
 972                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 973                        }
 974                        cl->prio_activity = 0;
 975                        cl->cmode = HTB_CAN_SEND;
 976
 977                }
 978        }
 979        qdisc_watchdog_cancel(&q->watchdog);
 980        __skb_queue_purge(&q->direct_queue);
 981        sch->q.qlen = 0;
 982        memset(q->row, 0, sizeof(q->row));
 983        memset(q->row_mask, 0, sizeof(q->row_mask));
 984        memset(q->wait_pq, 0, sizeof(q->wait_pq));
 985        memset(q->ptr, 0, sizeof(q->ptr));
 986        for (i = 0; i < TC_HTB_NUMPRIO; i++)
 987                INIT_LIST_HEAD(q->drops + i);
 988}
 989
 990static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 991        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
 992        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
 993        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 994        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 995};
 996
 997static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 998{
 999        struct htb_sched *q = qdisc_priv(sch);
1000        struct nlattr *tb[TCA_HTB_INIT + 1];
1001        struct tc_htb_glob *gopt;
1002        int err;
1003        int i;
1004
1005        if (!opt)
1006                return -EINVAL;
1007
1008        err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1009        if (err < 0)
1010                return err;
1011
1012        if (tb[TCA_HTB_INIT] == NULL) {
1013                printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1014                return -EINVAL;
1015        }
1016        gopt = nla_data(tb[TCA_HTB_INIT]);
1017        if (gopt->version != HTB_VER >> 16) {
1018                printk(KERN_ERR
1019                       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1020                       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1021                return -EINVAL;
1022        }
1023
1024        err = qdisc_class_hash_init(&q->clhash);
1025        if (err < 0)
1026                return err;
1027        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1028                INIT_LIST_HEAD(q->drops + i);
1029
1030        qdisc_watchdog_init(&q->watchdog, sch);
1031        skb_queue_head_init(&q->direct_queue);
1032
1033        q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1034        if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1035                q->direct_qlen = 2;
1036
1037        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1038                q->rate2quantum = 1;
1039        q->defcls = gopt->defcls;
1040
1041        return 0;
1042}
1043
1044static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1045{
1046        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1047        struct htb_sched *q = qdisc_priv(sch);
1048        struct nlattr *nest;
1049        struct tc_htb_glob gopt;
1050
1051        spin_lock_bh(root_lock);
1052
1053        gopt.direct_pkts = q->direct_pkts;
1054        gopt.version = HTB_VER;
1055        gopt.rate2quantum = q->rate2quantum;
1056        gopt.defcls = q->defcls;
1057        gopt.debug = 0;
1058
1059        nest = nla_nest_start(skb, TCA_OPTIONS);
1060        if (nest == NULL)
1061                goto nla_put_failure;
1062        NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1063        nla_nest_end(skb, nest);
1064
1065        spin_unlock_bh(root_lock);
1066        return skb->len;
1067
1068nla_put_failure:
1069        spin_unlock_bh(root_lock);
1070        nla_nest_cancel(skb, nest);
1071        return -1;
1072}
1073
1074static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1075                          struct sk_buff *skb, struct tcmsg *tcm)
1076{
1077        struct htb_class *cl = (struct htb_class *)arg;
1078        spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1079        struct nlattr *nest;
1080        struct tc_htb_opt opt;
1081
1082        spin_lock_bh(root_lock);
1083        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1084        tcm->tcm_handle = cl->common.classid;
1085        if (!cl->level && cl->un.leaf.q)
1086                tcm->tcm_info = cl->un.leaf.q->handle;
1087
1088        nest = nla_nest_start(skb, TCA_OPTIONS);
1089        if (nest == NULL)
1090                goto nla_put_failure;
1091
1092        memset(&opt, 0, sizeof(opt));
1093
1094        opt.rate = cl->rate->rate;
1095        opt.buffer = cl->buffer;
1096        opt.ceil = cl->ceil->rate;
1097        opt.cbuffer = cl->cbuffer;
1098        opt.quantum = cl->un.leaf.quantum;
1099        opt.prio = cl->un.leaf.prio;
1100        opt.level = cl->level;
1101        NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1102
1103        nla_nest_end(skb, nest);
1104        spin_unlock_bh(root_lock);
1105        return skb->len;
1106
1107nla_put_failure:
1108        spin_unlock_bh(root_lock);
1109        nla_nest_cancel(skb, nest);
1110        return -1;
1111}
1112
1113static int
1114htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1115{
1116        struct htb_class *cl = (struct htb_class *)arg;
1117
1118        if (!cl->level && cl->un.leaf.q)
1119                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1120        cl->xstats.tokens = cl->tokens;
1121        cl->xstats.ctokens = cl->ctokens;
1122
1123        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1124            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1125            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1126                return -1;
1127
1128        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1129}
1130
1131static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1132                     struct Qdisc **old)
1133{
1134        struct htb_class *cl = (struct htb_class *)arg;
1135
1136        if (cl && !cl->level) {
1137                if (new == NULL &&
1138                    (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1139                                             &pfifo_qdisc_ops,
1140                                             cl->common.classid))
1141                    == NULL)
1142                        return -ENOBUFS;
1143                sch_tree_lock(sch);
1144                if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1145                        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1146                        qdisc_reset(*old);
1147                }
1148                sch_tree_unlock(sch);
1149                return 0;
1150        }
1151        return -ENOENT;
1152}
1153
1154static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1155{
1156        struct htb_class *cl = (struct htb_class *)arg;
1157        return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1158}
1159
1160static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1161{
1162        struct htb_class *cl = (struct htb_class *)arg;
1163
1164        if (cl->un.leaf.q->q.qlen == 0)
1165                htb_deactivate(qdisc_priv(sch), cl);
1166}
1167
1168static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1169{
1170        struct htb_class *cl = htb_find(classid, sch);
1171        if (cl)
1172                cl->refcnt++;
1173        return (unsigned long)cl;
1174}
1175
1176static inline int htb_parent_last_child(struct htb_class *cl)
1177{
1178        if (!cl->parent)
1179                /* the root class */
1180                return 0;
1181        if (cl->parent->children > 1)
1182                /* not the last child */
1183                return 0;
1184        return 1;
1185}
1186
1187static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1188                               struct Qdisc *new_q)
1189{
1190        struct htb_class *parent = cl->parent;
1191
1192        WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
1193
1194        if (parent->cmode != HTB_CAN_SEND)
1195                htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1196
1197        parent->level = 0;
1198        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1199        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1200        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1201        parent->un.leaf.quantum = parent->quantum;
1202        parent->un.leaf.prio = parent->prio;
1203        parent->tokens = parent->buffer;
1204        parent->ctokens = parent->cbuffer;
1205        parent->t_c = psched_get_time();
1206        parent->cmode = HTB_CAN_SEND;
1207}
1208
1209static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1210{
1211        if (!cl->level) {
1212                WARN_ON(!cl->un.leaf.q);
1213                qdisc_destroy(cl->un.leaf.q);
1214        }
1215        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1216        qdisc_put_rtab(cl->rate);
1217        qdisc_put_rtab(cl->ceil);
1218
1219        tcf_destroy_chain(&cl->filter_list);
1220        kfree(cl);
1221}
1222
1223/* always caled under BH & queue lock */
1224static void htb_destroy(struct Qdisc *sch)
1225{
1226        struct htb_sched *q = qdisc_priv(sch);
1227        struct hlist_node *n, *next;
1228        struct htb_class *cl;
1229        unsigned int i;
1230
1231        qdisc_watchdog_cancel(&q->watchdog);
1232        /* This line used to be after htb_destroy_class call below
1233           and surprisingly it worked in 2.4. But it must precede it
1234           because filter need its target class alive to be able to call
1235           unbind_filter on it (without Oops). */
1236        tcf_destroy_chain(&q->filter_list);
1237
1238        for (i = 0; i < q->clhash.hashsize; i++) {
1239                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1240                        tcf_destroy_chain(&cl->filter_list);
1241        }
1242        for (i = 0; i < q->clhash.hashsize; i++) {
1243                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1244                                          common.hnode)
1245                        htb_destroy_class(sch, cl);
1246        }
1247        qdisc_class_hash_destroy(&q->clhash);
1248        __skb_queue_purge(&q->direct_queue);
1249}
1250
1251static int htb_delete(struct Qdisc *sch, unsigned long arg)
1252{
1253        struct htb_sched *q = qdisc_priv(sch);
1254        struct htb_class *cl = (struct htb_class *)arg;
1255        unsigned int qlen;
1256        struct Qdisc *new_q = NULL;
1257        int last_child = 0;
1258
1259        // TODO: why don't allow to delete subtree ? references ? does
1260        // tc subsys quarantee us that in htb_destroy it holds no class
1261        // refs so that we can remove children safely there ?
1262        if (cl->children || cl->filter_cnt)
1263                return -EBUSY;
1264
1265        if (!cl->level && htb_parent_last_child(cl)) {
1266                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1267                                          &pfifo_qdisc_ops,
1268                                          cl->parent->common.classid);
1269                last_child = 1;
1270        }
1271
1272        sch_tree_lock(sch);
1273
1274        if (!cl->level) {
1275                qlen = cl->un.leaf.q->q.qlen;
1276                qdisc_reset(cl->un.leaf.q);
1277                qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1278        }
1279
1280        /* delete from hash and active; remainder in destroy_class */
1281        qdisc_class_hash_remove(&q->clhash, &cl->common);
1282        if (cl->parent)
1283                cl->parent->children--;
1284
1285        if (cl->prio_activity)
1286                htb_deactivate(q, cl);
1287
1288        if (cl->cmode != HTB_CAN_SEND)
1289                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1290
1291        if (last_child)
1292                htb_parent_to_leaf(q, cl, new_q);
1293
1294        if (--cl->refcnt == 0)
1295                htb_destroy_class(sch, cl);
1296
1297        sch_tree_unlock(sch);
1298        return 0;
1299}
1300
1301static void htb_put(struct Qdisc *sch, unsigned long arg)
1302{
1303        struct htb_class *cl = (struct htb_class *)arg;
1304
1305        if (--cl->refcnt == 0)
1306                htb_destroy_class(sch, cl);
1307}
1308
1309static int htb_change_class(struct Qdisc *sch, u32 classid,
1310                            u32 parentid, struct nlattr **tca,
1311                            unsigned long *arg)
1312{
1313        int err = -EINVAL;
1314        struct htb_sched *q = qdisc_priv(sch);
1315        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1316        struct nlattr *opt = tca[TCA_OPTIONS];
1317        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1318        struct nlattr *tb[TCA_HTB_RTAB + 1];
1319        struct tc_htb_opt *hopt;
1320
1321        /* extract all subattrs from opt attr */
1322        if (!opt)
1323                goto failure;
1324
1325        err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1326        if (err < 0)
1327                goto failure;
1328
1329        err = -EINVAL;
1330        if (tb[TCA_HTB_PARMS] == NULL)
1331                goto failure;
1332
1333        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1334
1335        hopt = nla_data(tb[TCA_HTB_PARMS]);
1336
1337        rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1338        ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1339        if (!rtab || !ctab)
1340                goto failure;
1341
1342        if (!cl) {              /* new class */
1343                struct Qdisc *new_q;
1344                int prio;
1345                struct {
1346                        struct nlattr           nla;
1347                        struct gnet_estimator   opt;
1348                } est = {
1349                        .nla = {
1350                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1351                                .nla_type       = TCA_RATE,
1352                        },
1353                        .opt = {
1354                                /* 4s interval, 16s averaging constant */
1355                                .interval       = 2,
1356                                .ewma_log       = 2,
1357                        },
1358                };
1359
1360                /* check for valid classid */
1361                if (!classid || TC_H_MAJ(classid ^ sch->handle)
1362                    || htb_find(classid, sch))
1363                        goto failure;
1364
1365                /* check maximal depth */
1366                if (parent && parent->parent && parent->parent->level < 2) {
1367                        printk(KERN_ERR "htb: tree is too deep\n");
1368                        goto failure;
1369                }
1370                err = -ENOBUFS;
1371                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1372                        goto failure;
1373
1374                gen_new_estimator(&cl->bstats, &cl->rate_est,
1375                                  qdisc_root_sleeping_lock(sch),
1376                                  tca[TCA_RATE] ? : &est.nla);
1377                cl->refcnt = 1;
1378                cl->children = 0;
1379                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1380                RB_CLEAR_NODE(&cl->pq_node);
1381
1382                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1383                        RB_CLEAR_NODE(&cl->node[prio]);
1384
1385                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1386                   so that can't be used inside of sch_tree_lock
1387                   -- thanks to Karlis Peisenieks */
1388                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1389                                          &pfifo_qdisc_ops, classid);
1390                sch_tree_lock(sch);
1391                if (parent && !parent->level) {
1392                        unsigned int qlen = parent->un.leaf.q->q.qlen;
1393
1394                        /* turn parent into inner node */
1395                        qdisc_reset(parent->un.leaf.q);
1396                        qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1397                        qdisc_destroy(parent->un.leaf.q);
1398                        if (parent->prio_activity)
1399                                htb_deactivate(q, parent);
1400
1401                        /* remove from evt list because of level change */
1402                        if (parent->cmode != HTB_CAN_SEND) {
1403                                htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1404                                parent->cmode = HTB_CAN_SEND;
1405                        }
1406                        parent->level = (parent->parent ? parent->parent->level
1407                                         : TC_HTB_MAXDEPTH) - 1;
1408                        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1409                }
1410                /* leaf (we) needs elementary qdisc */
1411                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1412
1413                cl->common.classid = classid;
1414                cl->parent = parent;
1415
1416                /* set class to be in HTB_CAN_SEND state */
1417                cl->tokens = hopt->buffer;
1418                cl->ctokens = hopt->cbuffer;
1419                cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;        /* 1min */
1420                cl->t_c = psched_get_time();
1421                cl->cmode = HTB_CAN_SEND;
1422
1423                /* attach to the hash list and parent's family */
1424                qdisc_class_hash_insert(&q->clhash, &cl->common);
1425                if (parent)
1426                        parent->children++;
1427        } else {
1428                if (tca[TCA_RATE])
1429                        gen_replace_estimator(&cl->bstats, &cl->rate_est,
1430                                              qdisc_root_sleeping_lock(sch),
1431                                              tca[TCA_RATE]);
1432                sch_tree_lock(sch);
1433        }
1434
1435        /* it used to be a nasty bug here, we have to check that node
1436           is really leaf before changing cl->un.leaf ! */
1437        if (!cl->level) {
1438                cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1439                if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
1440                        printk(KERN_WARNING
1441                               "HTB: quantum of class %X is small. Consider r2q change.\n",
1442                               cl->common.classid);
1443                        cl->un.leaf.quantum = 1000;
1444                }
1445                if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
1446                        printk(KERN_WARNING
1447                               "HTB: quantum of class %X is big. Consider r2q change.\n",
1448                               cl->common.classid);
1449                        cl->un.leaf.quantum = 200000;
1450                }
1451                if (hopt->quantum)
1452                        cl->un.leaf.quantum = hopt->quantum;
1453                if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1454                        cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1455
1456                /* backup for htb_parent_to_leaf */
1457                cl->quantum = cl->un.leaf.quantum;
1458                cl->prio = cl->un.leaf.prio;
1459        }
1460
1461        cl->buffer = hopt->buffer;
1462        cl->cbuffer = hopt->cbuffer;
1463        if (cl->rate)
1464                qdisc_put_rtab(cl->rate);
1465        cl->rate = rtab;
1466        if (cl->ceil)
1467                qdisc_put_rtab(cl->ceil);
1468        cl->ceil = ctab;
1469        sch_tree_unlock(sch);
1470
1471        qdisc_class_hash_grow(sch, &q->clhash);
1472
1473        *arg = (unsigned long)cl;
1474        return 0;
1475
1476failure:
1477        if (rtab)
1478                qdisc_put_rtab(rtab);
1479        if (ctab)
1480                qdisc_put_rtab(ctab);
1481        return err;
1482}
1483
1484static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1485{
1486        struct htb_sched *q = qdisc_priv(sch);
1487        struct htb_class *cl = (struct htb_class *)arg;
1488        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1489
1490        return fl;
1491}
1492
1493static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1494                                     u32 classid)
1495{
1496        struct htb_class *cl = htb_find(classid, sch);
1497
1498        /*if (cl && !cl->level) return 0;
1499           The line above used to be there to prevent attaching filters to
1500           leaves. But at least tc_index filter uses this just to get class
1501           for other reasons so that we have to allow for it.
1502           ----
1503           19.6.2002 As Werner explained it is ok - bind filter is just
1504           another way to "lock" the class - unlike "get" this lock can
1505           be broken by class during destroy IIUC.
1506         */
1507        if (cl)
1508                cl->filter_cnt++;
1509        return (unsigned long)cl;
1510}
1511
1512static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1513{
1514        struct htb_class *cl = (struct htb_class *)arg;
1515
1516        if (cl)
1517                cl->filter_cnt--;
1518}
1519
1520static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1521{
1522        struct htb_sched *q = qdisc_priv(sch);
1523        struct htb_class *cl;
1524        struct hlist_node *n;
1525        unsigned int i;
1526
1527        if (arg->stop)
1528                return;
1529
1530        for (i = 0; i < q->clhash.hashsize; i++) {
1531                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
1532                        if (arg->count < arg->skip) {
1533                                arg->count++;
1534                                continue;
1535                        }
1536                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1537                                arg->stop = 1;
1538                                return;
1539                        }
1540                        arg->count++;
1541                }
1542        }
1543}
1544
1545static const struct Qdisc_class_ops htb_class_ops = {
1546        .graft          =       htb_graft,
1547        .leaf           =       htb_leaf,
1548        .qlen_notify    =       htb_qlen_notify,
1549        .get            =       htb_get,
1550        .put            =       htb_put,
1551        .change         =       htb_change_class,
1552        .delete         =       htb_delete,
1553        .walk           =       htb_walk,
1554        .tcf_chain      =       htb_find_tcf,
1555        .bind_tcf       =       htb_bind_filter,
1556        .unbind_tcf     =       htb_unbind_filter,
1557        .dump           =       htb_dump_class,
1558        .dump_stats     =       htb_dump_class_stats,
1559};
1560
1561static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1562        .next           =       NULL,
1563        .cl_ops         =       &htb_class_ops,
1564        .id             =       "htb",
1565        .priv_size      =       sizeof(struct htb_sched),
1566        .enqueue        =       htb_enqueue,
1567        .dequeue        =       htb_dequeue,
1568        .requeue        =       htb_requeue,
1569        .drop           =       htb_drop,
1570        .init           =       htb_init,
1571        .reset          =       htb_reset,
1572        .destroy        =       htb_destroy,
1573        .change         =       NULL /* htb_change */,
1574        .dump           =       htb_dump,
1575        .owner          =       THIS_MODULE,
1576};
1577
1578static int __init htb_module_init(void)
1579{
1580        return register_qdisc(&htb_qdisc_ops);
1581}
1582static void __exit htb_module_exit(void)
1583{
1584        unregister_qdisc(&htb_qdisc_ops);
1585}
1586
1587module_init(htb_module_init)
1588module_exit(htb_module_exit)
1589MODULE_LICENSE("GPL");
1590
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.