linux/net/sched/sch_htb.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Martin Devera, <devik@cdi.cz>
  10 *
  11 * Credits (in time order) for older HTB versions:
  12 *              Stef Coene <stef.coene@docum.org>
  13 *                      HTB support at LARTC mailing list
  14 *              Ondrej Kraus, <krauso@barr.cz>
  15 *                      found missing INIT_QDISC(htb)
  16 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  17 *                      helped a lot to locate nasty class stall bug
  18 *              Andi Kleen, Jamal Hadi, Bert Hubert
  19 *                      code review and helpful comments on shaping
  20 *              Tomasz Wrona, <tw@eter.tym.pl>
  21 *                      created test case so that I was able to fix nasty bug
  22 *              Wilfried Weissmann
  23 *                      spotted bug in dequeue code and helped with fix
  24 *              Jiri Fojtasek
  25 *                      fixed requeue routine
  26 *              and many others. thanks.
  27 *
  28 * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
  29 */
  30#include <linux/module.h>
  31#include <linux/types.h>
  32#include <linux/kernel.h>
  33#include <linux/string.h>
  34#include <linux/errno.h>
  35#include <linux/skbuff.h>
  36#include <linux/list.h>
  37#include <linux/compiler.h>
  38#include <linux/rbtree.h>
  39#include <net/netlink.h>
  40#include <net/pkt_sched.h>
  41
  42/* HTB algorithm.
  43    Author: devik@cdi.cz
  44    ========================================================================
  45    HTB is like TBF with multiple classes. It is also similar to CBQ because
  46    it allows to assign priority to each class in hierarchy.
  47    In fact it is another implementation of Floyd's formal sharing.
  48
  49    Levels:
  50    Each class is assigned level. Leaf has ALWAYS level 0 and root
  51    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  52    one less than their parent.
  53*/
  54
  55#define HTB_HSIZE 16            /* classid hash size */
  56#define HTB_HYSTERESIS 1        /* whether to use mode hysteresis for speedup */
  57#define HTB_VER 0x30011         /* major must be matched with number suplied by TC as version */
  58
  59#if HTB_VER >> 16 != TC_HTB_PROTOVER
  60#error "Mismatched sch_htb.c and pkt_sch.h"
  61#endif
  62
  63/* used internaly to keep status of single class */
  64enum htb_cmode {
  65        HTB_CANT_SEND,          /* class can't send and can't borrow */
  66        HTB_MAY_BORROW,         /* class can't send but may borrow */
  67        HTB_CAN_SEND            /* class can send */
  68};
  69
  70/* interior & leaf nodes; props specific to leaves are marked L: */
  71struct htb_class {
  72        /* general class parameters */
  73        u32 classid;
  74        struct gnet_stats_basic bstats;
  75        struct gnet_stats_queue qstats;
  76        struct gnet_stats_rate_est rate_est;
  77        struct tc_htb_xstats xstats;    /* our special stats */
  78        int refcnt;             /* usage count of this class */
  79
  80        /* topology */
  81        int level;              /* our level (see above) */
  82        struct htb_class *parent;       /* parent class */
  83        struct hlist_node hlist;        /* classid hash list item */
  84        struct list_head sibling;       /* sibling list item */
  85        struct list_head children;      /* children list */
  86
  87        union {
  88                struct htb_class_leaf {
  89                        struct Qdisc *q;
  90                        int prio;
  91                        int aprio;
  92                        int quantum;
  93                        int deficit[TC_HTB_MAXDEPTH];
  94                        struct list_head drop_list;
  95                } leaf;
  96                struct htb_class_inner {
  97                        struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
  98                        struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
  99                        /* When class changes from state 1->2 and disconnects from
 100                           parent's feed then we lost ptr value and start from the
 101                           first child again. Here we store classid of the
 102                           last valid ptr (used when ptr is NULL). */
 103                        u32 last_ptr_id[TC_HTB_NUMPRIO];
 104                } inner;
 105        } un;
 106        struct rb_node node[TC_HTB_NUMPRIO];    /* node for self or feed tree */
 107        struct rb_node pq_node; /* node for event queue */
 108        psched_time_t pq_key;
 109
 110        int prio_activity;      /* for which prios are we active */
 111        enum htb_cmode cmode;   /* current mode of the class */
 112
 113        /* class attached filters */
 114        struct tcf_proto *filter_list;
 115        int filter_cnt;
 116
 117        int warned;             /* only one warning about non work conserving .. */
 118
 119        /* token bucket parameters */
 120        struct qdisc_rate_table *rate;  /* rate table of the class itself */
 121        struct qdisc_rate_table *ceil;  /* ceiling rate (limits borrows too) */
 122        long buffer, cbuffer;   /* token bucket depth/rate */
 123        psched_tdiff_t mbuffer; /* max wait time */
 124        long tokens, ctokens;   /* current number of tokens */
 125        psched_time_t t_c;      /* checkpoint time */
 126
 127        int prio;               /* For parent to leaf return possible here */
 128        int quantum;            /* we do backup. Finally full replacement  */
 129                                /* of un.leaf originals should be done. */
 130};
 131
 132static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
 133                           int size)
 134{
 135        long result = qdisc_l2t(rate, size);
 136        return result;
 137}
 138
 139struct htb_sched {
 140        struct list_head root;  /* root classes list */
 141        struct hlist_head hash[HTB_HSIZE];      /* hashed by classid */
 142        struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 143
 144        /* self list - roots of self generating tree */
 145        struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 146        int row_mask[TC_HTB_MAXDEPTH];
 147        struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 148        u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
 149
 150        /* self wait list - roots of wait PQs per row */
 151        struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 152
 153        /* time of nearest event per level (row) */
 154        psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 155
 156        /* whether we hit non-work conserving class during this dequeue; we use */
 157        int nwc_hit;            /* this to disable mindelay complaint in dequeue */
 158
 159        int defcls;             /* class where unclassified flows go to */
 160
 161        /* filters for qdisc itself */
 162        struct tcf_proto *filter_list;
 163        int filter_cnt;
 164
 165        int rate2quantum;       /* quant = rate / rate2quantum */
 166        psched_time_t now;      /* cached dequeue time */
 167        struct qdisc_watchdog watchdog;
 168
 169        /* non shaped skbs; let them go directly thru */
 170        struct sk_buff_head direct_queue;
 171        int direct_qlen;        /* max qlen of above */
 172
 173        long direct_pkts;
 174};
 175
 176/* compute hash of size HTB_HSIZE for given handle */
 177static inline int htb_hash(u32 h)
 178{
 179#if HTB_HSIZE != 16
 180#error "Declare new hash for your HTB_HSIZE"
 181#endif
 182        h ^= h >> 8;            /* stolen from cbq_hash */
 183        h ^= h >> 4;
 184        return h & 0xf;
 185}
 186
 187/* find class in global hash table using given handle */
 188static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 189{
 190        struct htb_sched *q = qdisc_priv(sch);
 191        struct hlist_node *p;
 192        struct htb_class *cl;
 193
 194        if (TC_H_MAJ(handle) != sch->handle)
 195                return NULL;
 196
 197        hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) {
 198                if (cl->classid == handle)
 199                        return cl;
 200        }
 201        return NULL;
 202}
 203
 204/**
 205 * htb_classify - classify a packet into class
 206 *
 207 * It returns NULL if the packet should be dropped or -1 if the packet
 208 * should be passed directly thru. In all other cases leaf class is returned.
 209 * We allow direct class selection by classid in priority. The we examine
 210 * filters in qdisc and in inner nodes (if higher filter points to the inner
 211 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 212 * internal fifo (direct). These packets then go directly thru. If we still
 213 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
 214 * then finish and return direct queue.
 215 */
 216#define HTB_DIRECT (struct htb_class*)-1
 217
 218static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 219                                      int *qerr)
 220{
 221        struct htb_sched *q = qdisc_priv(sch);
 222        struct htb_class *cl;
 223        struct tcf_result res;
 224        struct tcf_proto *tcf;
 225        int result;
 226
 227        /* allow to select class by setting skb->priority to valid classid;
 228           note that nfmark can be used too by attaching filter fw with no
 229           rules in it */
 230        if (skb->priority == sch->handle)
 231                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 232        if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
 233                return cl;
 234
 235        *qerr = NET_XMIT_BYPASS;
 236        tcf = q->filter_list;
 237        while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 238#ifdef CONFIG_NET_CLS_ACT
 239                switch (result) {
 240                case TC_ACT_QUEUED:
 241                case TC_ACT_STOLEN:
 242                        *qerr = NET_XMIT_SUCCESS;
 243                case TC_ACT_SHOT:
 244                        return NULL;
 245                }
 246#endif
 247                if ((cl = (void *)res.class) == NULL) {
 248                        if (res.classid == sch->handle)
 249                                return HTB_DIRECT;      /* X:0 (direct flow) */
 250                        if ((cl = htb_find(res.classid, sch)) == NULL)
 251                                break;  /* filter selected invalid classid */
 252                }
 253                if (!cl->level)
 254                        return cl;      /* we hit leaf; return it */
 255
 256                /* we have got inner class; apply inner filter chain */
 257                tcf = cl->filter_list;
 258        }
 259        /* classification failed; try to use default class */
 260        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 261        if (!cl || cl->level)
 262                return HTB_DIRECT;      /* bad default .. this is safe bet */
 263        return cl;
 264}
 265
 266/**
 267 * htb_add_to_id_tree - adds class to the round robin list
 268 *
 269 * Routine adds class to the list (actually tree) sorted by classid.
 270 * Make sure that class is not already on such list for given prio.
 271 */
 272static void htb_add_to_id_tree(struct rb_root *root,
 273                               struct htb_class *cl, int prio)
 274{
 275        struct rb_node **p = &root->rb_node, *parent = NULL;
 276
 277        while (*p) {
 278                struct htb_class *c;
 279                parent = *p;
 280                c = rb_entry(parent, struct htb_class, node[prio]);
 281
 282                if (cl->classid > c->classid)
 283                        p = &parent->rb_right;
 284                else
 285                        p = &parent->rb_left;
 286        }
 287        rb_link_node(&cl->node[prio], parent, p);
 288        rb_insert_color(&cl->node[prio], root);
 289}
 290
 291/**
 292 * htb_add_to_wait_tree - adds class to the event queue with delay
 293 *
 294 * The class is added to priority event queue to indicate that class will
 295 * change its mode in cl->pq_key microseconds. Make sure that class is not
 296 * already in the queue.
 297 */
 298static void htb_add_to_wait_tree(struct htb_sched *q,
 299                                 struct htb_class *cl, long delay)
 300{
 301        struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 302
 303        cl->pq_key = q->now + delay;
 304        if (cl->pq_key == q->now)
 305                cl->pq_key++;
 306
 307        /* update the nearest event cache */
 308        if (q->near_ev_cache[cl->level] > cl->pq_key)
 309                q->near_ev_cache[cl->level] = cl->pq_key;
 310
 311        while (*p) {
 312                struct htb_class *c;
 313                parent = *p;
 314                c = rb_entry(parent, struct htb_class, pq_node);
 315                if (cl->pq_key >= c->pq_key)
 316                        p = &parent->rb_right;
 317                else
 318                        p = &parent->rb_left;
 319        }
 320        rb_link_node(&cl->pq_node, parent, p);
 321        rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
 322}
 323
 324/**
 325 * htb_next_rb_node - finds next node in binary tree
 326 *
 327 * When we are past last key we return NULL.
 328 * Average complexity is 2 steps per call.
 329 */
 330static inline void htb_next_rb_node(struct rb_node **n)
 331{
 332        *n = rb_next(*n);
 333}
 334
 335/**
 336 * htb_add_class_to_row - add class to its row
 337 *
 338 * The class is added to row at priorities marked in mask.
 339 * It does nothing if mask == 0.
 340 */
 341static inline void htb_add_class_to_row(struct htb_sched *q,
 342                                        struct htb_class *cl, int mask)
 343{
 344        q->row_mask[cl->level] |= mask;
 345        while (mask) {
 346                int prio = ffz(~mask);
 347                mask &= ~(1 << prio);
 348                htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
 349        }
 350}
 351
 352/* If this triggers, it is a bug in this code, but it need not be fatal */
 353static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 354{
 355        if (RB_EMPTY_NODE(rb)) {
 356                WARN_ON(1);
 357        } else {
 358                rb_erase(rb, root);
 359                RB_CLEAR_NODE(rb);
 360        }
 361}
 362
 363
 364/**
 365 * htb_remove_class_from_row - removes class from its row
 366 *
 367 * The class is removed from row at priorities marked in mask.
 368 * It does nothing if mask == 0.
 369 */
 370static inline void htb_remove_class_from_row(struct htb_sched *q,
 371                                                 struct htb_class *cl, int mask)
 372{
 373        int m = 0;
 374
 375        while (mask) {
 376                int prio = ffz(~mask);
 377
 378                mask &= ~(1 << prio);
 379                if (q->ptr[cl->level][prio] == cl->node + prio)
 380                        htb_next_rb_node(q->ptr[cl->level] + prio);
 381
 382                htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
 383                if (!q->row[cl->level][prio].rb_node)
 384                        m |= 1 << prio;
 385        }
 386        q->row_mask[cl->level] &= ~m;
 387}
 388
 389/**
 390 * htb_activate_prios - creates active classe's feed chain
 391 *
 392 * The class is connected to ancestors and/or appropriate rows
 393 * for priorities it is participating on. cl->cmode must be new
 394 * (activated) mode. It does nothing if cl->prio_activity == 0.
 395 */
 396static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 397{
 398        struct htb_class *p = cl->parent;
 399        long m, mask = cl->prio_activity;
 400
 401        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 402                m = mask;
 403                while (m) {
 404                        int prio = ffz(~m);
 405                        m &= ~(1 << prio);
 406
 407                        if (p->un.inner.feed[prio].rb_node)
 408                                /* parent already has its feed in use so that
 409                                   reset bit in mask as parent is already ok */
 410                                mask &= ~(1 << prio);
 411
 412                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
 413                }
 414                p->prio_activity |= mask;
 415                cl = p;
 416                p = cl->parent;
 417
 418        }
 419        if (cl->cmode == HTB_CAN_SEND && mask)
 420                htb_add_class_to_row(q, cl, mask);
 421}
 422
 423/**
 424 * htb_deactivate_prios - remove class from feed chain
 425 *
 426 * cl->cmode must represent old mode (before deactivation). It does
 427 * nothing if cl->prio_activity == 0. Class is removed from all feed
 428 * chains and rows.
 429 */
 430static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 431{
 432        struct htb_class *p = cl->parent;
 433        long m, mask = cl->prio_activity;
 434
 435        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 436                m = mask;
 437                mask = 0;
 438                while (m) {
 439                        int prio = ffz(~m);
 440                        m &= ~(1 << prio);
 441
 442                        if (p->un.inner.ptr[prio] == cl->node + prio) {
 443                                /* we are removing child which is pointed to from
 444                                   parent feed - forget the pointer but remember
 445                                   classid */
 446                                p->un.inner.last_ptr_id[prio] = cl->classid;
 447                                p->un.inner.ptr[prio] = NULL;
 448                        }
 449
 450                        htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
 451
 452                        if (!p->un.inner.feed[prio].rb_node)
 453                                mask |= 1 << prio;
 454                }
 455
 456                p->prio_activity &= ~mask;
 457                cl = p;
 458                p = cl->parent;
 459
 460        }
 461        if (cl->cmode == HTB_CAN_SEND && mask)
 462                htb_remove_class_from_row(q, cl, mask);
 463}
 464
 465#if HTB_HYSTERESIS
 466static inline long htb_lowater(const struct htb_class *cl)
 467{
 468        return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 469}
 470static inline long htb_hiwater(const struct htb_class *cl)
 471{
 472        return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 473}
 474#else
 475#define htb_lowater(cl) (0)
 476#define htb_hiwater(cl) (0)
 477#endif
 478
 479/**
 480 * htb_class_mode - computes and returns current class mode
 481 *
 482 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 483 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 484 * from now to time when cl will change its state.
 485 * Also it is worth to note that class mode doesn't change simply
 486 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 487 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 488 * mode transitions per time unit. The speed gain is about 1/6.
 489 */
 490static inline enum htb_cmode
 491htb_class_mode(struct htb_class *cl, long *diff)
 492{
 493        long toks;
 494
 495        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 496                *diff = -toks;
 497                return HTB_CANT_SEND;
 498        }
 499
 500        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 501                return HTB_CAN_SEND;
 502
 503        *diff = -toks;
 504        return HTB_MAY_BORROW;
 505}
 506
 507/**
 508 * htb_change_class_mode - changes classe's mode
 509 *
 510 * This should be the only way how to change classe's mode under normal
 511 * cirsumstances. Routine will update feed lists linkage, change mode
 512 * and add class to the wait event queue if appropriate. New mode should
 513 * be different from old one and cl->pq_key has to be valid if changing
 514 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 515 */
 516static void
 517htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
 518{
 519        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 520
 521        if (new_mode == cl->cmode)
 522                return;
 523
 524        if (cl->prio_activity) {        /* not necessary: speed optimization */
 525                if (cl->cmode != HTB_CANT_SEND)
 526                        htb_deactivate_prios(q, cl);
 527                cl->cmode = new_mode;
 528                if (new_mode != HTB_CANT_SEND)
 529                        htb_activate_prios(q, cl);
 530        } else
 531                cl->cmode = new_mode;
 532}
 533
 534/**
 535 * htb_activate - inserts leaf cl into appropriate active feeds
 536 *
 537 * Routine learns (new) priority of leaf and activates feed chain
 538 * for the prio. It can be called on already active leaf safely.
 539 * It also adds leaf into droplist.
 540 */
 541static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 542{
 543        BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
 544
 545        if (!cl->prio_activity) {
 546                cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
 547                htb_activate_prios(q, cl);
 548                list_add_tail(&cl->un.leaf.drop_list,
 549                              q->drops + cl->un.leaf.aprio);
 550        }
 551}
 552
 553/**
 554 * htb_deactivate - remove leaf cl from active feeds
 555 *
 556 * Make sure that leaf is active. In the other words it can't be called
 557 * with non-active leaf. It also removes class from the drop list.
 558 */
 559static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 560{
 561        BUG_TRAP(cl->prio_activity);
 562
 563        htb_deactivate_prios(q, cl);
 564        cl->prio_activity = 0;
 565        list_del_init(&cl->un.leaf.drop_list);
 566}
 567
 568static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 569{
 570        int ret;
 571        struct htb_sched *q = qdisc_priv(sch);
 572        struct htb_class *cl = htb_classify(skb, sch, &ret);
 573
 574        if (cl == HTB_DIRECT) {
 575                /* enqueue to helper queue */
 576                if (q->direct_queue.qlen < q->direct_qlen) {
 577                        __skb_queue_tail(&q->direct_queue, skb);
 578                        q->direct_pkts++;
 579                } else {
 580                        kfree_skb(skb);
 581                        sch->qstats.drops++;
 582                        return NET_XMIT_DROP;
 583                }
 584#ifdef CONFIG_NET_CLS_ACT
 585        } else if (!cl) {
 586                if (ret == NET_XMIT_BYPASS)
 587                        sch->qstats.drops++;
 588                kfree_skb(skb);
 589                return ret;
 590#endif
 591        } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) !=
 592                   NET_XMIT_SUCCESS) {
 593                sch->qstats.drops++;
 594                cl->qstats.drops++;
 595                return NET_XMIT_DROP;
 596        } else {
 597                cl->bstats.packets +=
 598                        skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 599                cl->bstats.bytes += skb->len;
 600                htb_activate(q, cl);
 601        }
 602
 603        sch->q.qlen++;
 604        sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
 605        sch->bstats.bytes += skb->len;
 606        return NET_XMIT_SUCCESS;
 607}
 608
 609/* TODO: requeuing packet charges it to policers again !! */
 610static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 611{
 612        int ret;
 613        struct htb_sched *q = qdisc_priv(sch);
 614        struct htb_class *cl = htb_classify(skb, sch, &ret);
 615        struct sk_buff *tskb;
 616
 617        if (cl == HTB_DIRECT) {
 618                /* enqueue to helper queue */
 619                if (q->direct_queue.qlen < q->direct_qlen) {
 620                        __skb_queue_head(&q->direct_queue, skb);
 621                } else {
 622                        __skb_queue_head(&q->direct_queue, skb);
 623                        tskb = __skb_dequeue_tail(&q->direct_queue);
 624                        kfree_skb(tskb);
 625                        sch->qstats.drops++;
 626                        return NET_XMIT_CN;
 627                }
 628#ifdef CONFIG_NET_CLS_ACT
 629        } else if (!cl) {
 630                if (ret == NET_XMIT_BYPASS)
 631                        sch->qstats.drops++;
 632                kfree_skb(skb);
 633                return ret;
 634#endif
 635        } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) !=
 636                   NET_XMIT_SUCCESS) {
 637                sch->qstats.drops++;
 638                cl->qstats.drops++;
 639                return NET_XMIT_DROP;
 640        } else
 641                htb_activate(q, cl);
 642
 643        sch->q.qlen++;
 644        sch->qstats.requeues++;
 645        return NET_XMIT_SUCCESS;
 646}
 647
 648/**
 649 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 650 *
 651 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 652 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 653 * leaf and all ancestors and to rate bucket for ancestors at levels
 654 * "level" and higher. It also handles possible change of mode resulting
 655 * from the update. Note that mode can also increase here (MAY_BORROW to
 656 * CAN_SEND) because we can use more precise clock that event queue here.
 657 * In such case we remove class from event queue first.
 658 */
 659static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 660                             int level, struct sk_buff *skb)
 661{
 662        int bytes = skb->len;
 663        long toks, diff;
 664        enum htb_cmode old_mode;
 665
 666#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
 667        if (toks > cl->B) toks = cl->B; \
 668        toks -= L2T(cl, cl->R, bytes); \
 669        if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
 670        cl->T = toks
 671
 672        while (cl) {
 673                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 674                if (cl->level >= level) {
 675                        if (cl->level == level)
 676                                cl->xstats.lends++;
 677                        HTB_ACCNT(tokens, buffer, rate);
 678                } else {
 679                        cl->xstats.borrows++;
 680                        cl->tokens += diff;     /* we moved t_c; update tokens */
 681                }
 682                HTB_ACCNT(ctokens, cbuffer, ceil);
 683                cl->t_c = q->now;
 684
 685                old_mode = cl->cmode;
 686                diff = 0;
 687                htb_change_class_mode(q, cl, &diff);
 688                if (old_mode != cl->cmode) {
 689                        if (old_mode != HTB_CAN_SEND)
 690                                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
 691                        if (cl->cmode != HTB_CAN_SEND)
 692                                htb_add_to_wait_tree(q, cl, diff);
 693                }
 694
 695                /* update byte stats except for leaves which are already updated */
 696                if (cl->level) {
 697                        cl->bstats.bytes += bytes;
 698                        cl->bstats.packets += skb_is_gso(skb)?
 699                                        skb_shinfo(skb)->gso_segs:1;
 700                }
 701                cl = cl->parent;
 702        }
 703}
 704
 705/**
 706 * htb_do_events - make mode changes to classes at the level
 707 *
 708 * Scans event queue for pending events and applies them. Returns time of
 709 * next pending event (0 for no event in pq).
 710 * Note: Applied are events whose have cl->pq_key <= q->now.
 711 */
 712static psched_time_t htb_do_events(struct htb_sched *q, int level)
 713{
 714        /* don't run for longer than 2 jiffies; 2 is used instead of
 715           1 to simplify things when jiffy is going to be incremented
 716           too soon */
 717        unsigned long stop_at = jiffies + 2;
 718        while (time_before(jiffies, stop_at)) {
 719                struct htb_class *cl;
 720                long diff;
 721                struct rb_node *p = rb_first(&q->wait_pq[level]);
 722
 723                if (!p)
 724                        return 0;
 725
 726                cl = rb_entry(p, struct htb_class, pq_node);
 727                if (cl->pq_key > q->now)
 728                        return cl->pq_key;
 729
 730                htb_safe_rb_erase(p, q->wait_pq + level);
 731                diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 732                htb_change_class_mode(q, cl, &diff);
 733                if (cl->cmode != HTB_CAN_SEND)
 734                        htb_add_to_wait_tree(q, cl, diff);
 735        }
 736        /* too much load - let's continue on next jiffie */
 737        return q->now + PSCHED_TICKS_PER_SEC / HZ;
 738}
 739
 740/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 741   is no such one exists. */
 742static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 743                                              u32 id)
 744{
 745        struct rb_node *r = NULL;
 746        while (n) {
 747                struct htb_class *cl =
 748                    rb_entry(n, struct htb_class, node[prio]);
 749                if (id == cl->classid)
 750                        return n;
 751
 752                if (id > cl->classid) {
 753                        n = n->rb_right;
 754                } else {
 755                        r = n;
 756                        n = n->rb_left;
 757                }
 758        }
 759        return r;
 760}
 761
 762/**
 763 * htb_lookup_leaf - returns next leaf class in DRR order
 764 *
 765 * Find leaf where current feed pointers points to.
 766 */
 767static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 768                                         struct rb_node **pptr, u32 * pid)
 769{
 770        int i;
 771        struct {
 772                struct rb_node *root;
 773                struct rb_node **pptr;
 774                u32 *pid;
 775        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 776
 777        BUG_TRAP(tree->rb_node);
 778        sp->root = tree->rb_node;
 779        sp->pptr = pptr;
 780        sp->pid = pid;
 781
 782        for (i = 0; i < 65535; i++) {
 783                if (!*sp->pptr && *sp->pid) {
 784                        /* ptr was invalidated but id is valid - try to recover
 785                           the original or next ptr */
 786                        *sp->pptr =
 787                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 788                }
 789                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 790                                   can become out of date quickly */
 791                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 792                        *sp->pptr = sp->root;
 793                        while ((*sp->pptr)->rb_left)
 794                                *sp->pptr = (*sp->pptr)->rb_left;
 795                        if (sp > stk) {
 796                                sp--;
 797                                BUG_TRAP(*sp->pptr);
 798                                if (!*sp->pptr)
 799                                        return NULL;
 800                                htb_next_rb_node(sp->pptr);
 801                        }
 802                } else {
 803                        struct htb_class *cl;
 804                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 805                        if (!cl->level)
 806                                return cl;
 807                        (++sp)->root = cl->un.inner.feed[prio].rb_node;
 808                        sp->pptr = cl->un.inner.ptr + prio;
 809                        sp->pid = cl->un.inner.last_ptr_id + prio;
 810                }
 811        }
 812        BUG_TRAP(0);
 813        return NULL;
 814}
 815
 816/* dequeues packet at given priority and level; call only if
 817   you are sure that there is active class at prio/level */
 818static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 819                                        int level)
 820{
 821        struct sk_buff *skb = NULL;
 822        struct htb_class *cl, *start;
 823        /* look initial class up in the row */
 824        start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
 825                                     q->ptr[level] + prio,
 826                                     q->last_ptr_id[level] + prio);
 827
 828        do {
 829next:
 830                BUG_TRAP(cl);
 831                if (!cl)
 832                        return NULL;
 833
 834                /* class can be empty - it is unlikely but can be true if leaf
 835                   qdisc drops packets in enqueue routine or if someone used
 836                   graft operation on the leaf since last dequeue;
 837                   simply deactivate and skip such class */
 838                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
 839                        struct htb_class *next;
 840                        htb_deactivate(q, cl);
 841
 842                        /* row/level might become empty */
 843                        if ((q->row_mask[level] & (1 << prio)) == 0)
 844                                return NULL;
 845
 846                        next = htb_lookup_leaf(q->row[level] + prio,
 847                                               prio, q->ptr[level] + prio,
 848                                               q->last_ptr_id[level] + prio);
 849
 850                        if (cl == start)        /* fix start if we just deleted it */
 851                                start = next;
 852                        cl = next;
 853                        goto next;
 854                }
 855
 856                skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
 857                if (likely(skb != NULL))
 858                        break;
 859                if (!cl->warned) {
 860                        printk(KERN_WARNING
 861                               "htb: class %X isn't work conserving ?!\n",
 862                               cl->classid);
 863                        cl->warned = 1;
 864                }
 865                q->nwc_hit++;
 866                htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 867                                  ptr[0]) + prio);
 868                cl = htb_lookup_leaf(q->row[level] + prio, prio,
 869                                     q->ptr[level] + prio,
 870                                     q->last_ptr_id[level] + prio);
 871
 872        } while (cl != start);
 873
 874        if (likely(skb != NULL)) {
 875                if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
 876                        cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
 877                        htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 878                                          ptr[0]) + prio);
 879                }
 880                /* this used to be after charge_class but this constelation
 881                   gives us slightly better performance */
 882                if (!cl->un.leaf.q->q.qlen)
 883                        htb_deactivate(q, cl);
 884                htb_charge_class(q, cl, level, skb);
 885        }
 886        return skb;
 887}
 888
 889static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 890{
 891        struct sk_buff *skb = NULL;
 892        struct htb_sched *q = qdisc_priv(sch);
 893        int level;
 894        psched_time_t next_event;
 895
 896        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 897        skb = __skb_dequeue(&q->direct_queue);
 898        if (skb != NULL) {
 899                sch->flags &= ~TCQ_F_THROTTLED;
 900                sch->q.qlen--;
 901                return skb;
 902        }
 903
 904        if (!sch->q.qlen)
 905                goto fin;
 906        q->now = psched_get_time();
 907
 908        next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 909        q->nwc_hit = 0;
 910        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 911                /* common case optimization - skip event handler quickly */
 912                int m;
 913                psched_time_t event;
 914
 915                if (q->now >= q->near_ev_cache[level]) {
 916                        event = htb_do_events(q, level);
 917                        if (!event)
 918                                event = q->now + PSCHED_TICKS_PER_SEC;
 919                        q->near_ev_cache[level] = event;
 920                } else
 921                        event = q->near_ev_cache[level];
 922
 923                if (event && next_event > event)
 924                        next_event = event;
 925
 926                m = ~q->row_mask[level];
 927                while (m != (int)(-1)) {
 928                        int prio = ffz(m);
 929                        m |= 1 << prio;
 930                        skb = htb_dequeue_tree(q, prio, level);
 931                        if (likely(skb != NULL)) {
 932                                sch->q.qlen--;
 933                                sch->flags &= ~TCQ_F_THROTTLED;
 934                                goto fin;
 935                        }
 936                }
 937        }
 938        sch->qstats.overlimits++;
 939        qdisc_watchdog_schedule(&q->watchdog, next_event);
 940fin:
 941        return skb;
 942}
 943
 944/* try to drop from each class (by prio) until one succeed */
 945static unsigned int htb_drop(struct Qdisc *sch)
 946{
 947        struct htb_sched *q = qdisc_priv(sch);
 948        int prio;
 949
 950        for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
 951                struct list_head *p;
 952                list_for_each(p, q->drops + prio) {
 953                        struct htb_class *cl = list_entry(p, struct htb_class,
 954                                                          un.leaf.drop_list);
 955                        unsigned int len;
 956                        if (cl->un.leaf.q->ops->drop &&
 957                            (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
 958                                sch->q.qlen--;
 959                                if (!cl->un.leaf.q->q.qlen)
 960                                        htb_deactivate(q, cl);
 961                                return len;
 962                        }
 963                }
 964        }
 965        return 0;
 966}
 967
 968/* reset all classes */
 969/* always caled under BH & queue lock */
 970static void htb_reset(struct Qdisc *sch)
 971{
 972        struct htb_sched *q = qdisc_priv(sch);
 973        int i;
 974
 975        for (i = 0; i < HTB_HSIZE; i++) {
 976                struct hlist_node *p;
 977                struct htb_class *cl;
 978
 979                hlist_for_each_entry(cl, p, q->hash + i, hlist) {
 980                        if (cl->level)
 981                                memset(&cl->un.inner, 0, sizeof(cl->un.inner));
 982                        else {
 983                                if (cl->un.leaf.q)
 984                                        qdisc_reset(cl->un.leaf.q);
 985                                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 986                        }
 987                        cl->prio_activity = 0;
 988                        cl->cmode = HTB_CAN_SEND;
 989
 990                }
 991        }
 992        qdisc_watchdog_cancel(&q->watchdog);
 993        __skb_queue_purge(&q->direct_queue);
 994        sch->q.qlen = 0;
 995        memset(q->row, 0, sizeof(q->row));
 996        memset(q->row_mask, 0, sizeof(q->row_mask));
 997        memset(q->wait_pq, 0, sizeof(q->wait_pq));
 998        memset(q->ptr, 0, sizeof(q->ptr));
 999        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1000                INIT_LIST_HEAD(q->drops + i);
1001}
1002
1003static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1004        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1005        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1006        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1007        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1008};
1009
1010static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1011{
1012        struct htb_sched *q = qdisc_priv(sch);
1013        struct nlattr *tb[TCA_HTB_INIT + 1];
1014        struct tc_htb_glob *gopt;
1015        int err;
1016        int i;
1017
1018        if (!opt)
1019                return -EINVAL;
1020
1021        err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
1022        if (err < 0)
1023                return err;
1024
1025        if (tb[TCA_HTB_INIT] == NULL) {
1026                printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1027                return -EINVAL;
1028        }
1029        gopt = nla_data(tb[TCA_HTB_INIT]);
1030        if (gopt->version != HTB_VER >> 16) {
1031                printk(KERN_ERR
1032                       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1033                       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1034                return -EINVAL;
1035        }
1036
1037        INIT_LIST_HEAD(&q->root);
1038        for (i = 0; i < HTB_HSIZE; i++)
1039                INIT_HLIST_HEAD(q->hash + i);
1040        for (i = 0; i < TC_HTB_NUMPRIO; i++)
1041                INIT_LIST_HEAD(q->drops + i);
1042
1043        qdisc_watchdog_init(&q->watchdog, sch);
1044        skb_queue_head_init(&q->direct_queue);
1045
1046        q->direct_qlen = sch->dev->tx_queue_len;
1047        if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1048                q->direct_qlen = 2;
1049
1050        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1051                q->rate2quantum = 1;
1052        q->defcls = gopt->defcls;
1053
1054        return 0;
1055}
1056
1057static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1058{
1059        struct htb_sched *q = qdisc_priv(sch);
1060        struct nlattr *nest;
1061        struct tc_htb_glob gopt;
1062
1063        spin_lock_bh(&sch->dev->queue_lock);
1064
1065        gopt.direct_pkts = q->direct_pkts;
1066        gopt.version = HTB_VER;
1067        gopt.rate2quantum = q->rate2quantum;
1068        gopt.defcls = q->defcls;
1069        gopt.debug = 0;
1070
1071        nest = nla_nest_start(skb, TCA_OPTIONS);
1072        if (nest == NULL)
1073                goto nla_put_failure;
1074        NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1075        nla_nest_end(skb, nest);
1076
1077        spin_unlock_bh(&sch->dev->queue_lock);
1078        return skb->len;
1079
1080nla_put_failure:
1081        spin_unlock_bh(&sch->dev->queue_lock);
1082        nla_nest_cancel(skb, nest);
1083        return -1;
1084}
1085
1086static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1087                          struct sk_buff *skb, struct tcmsg *tcm)
1088{
1089        struct htb_class *cl = (struct htb_class *)arg;
1090        struct nlattr *nest;
1091        struct tc_htb_opt opt;
1092
1093        spin_lock_bh(&sch->dev->queue_lock);
1094        tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1095        tcm->tcm_handle = cl->classid;
1096        if (!cl->level && cl->un.leaf.q)
1097                tcm->tcm_info = cl->un.leaf.q->handle;
1098
1099        nest = nla_nest_start(skb, TCA_OPTIONS);
1100        if (nest == NULL)
1101                goto nla_put_failure;
1102
1103        memset(&opt, 0, sizeof(opt));
1104
1105        opt.rate = cl->rate->rate;
1106        opt.buffer = cl->buffer;
1107        opt.ceil = cl->ceil->rate;
1108        opt.cbuffer = cl->cbuffer;
1109        opt.quantum = cl->un.leaf.quantum;
1110        opt.prio = cl->un.leaf.prio;
1111        opt.level = cl->level;
1112        NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1113
1114        nla_nest_end(skb, nest);
1115        spin_unlock_bh(&sch->dev->queue_lock);
1116        return skb->len;
1117
1118nla_put_failure:
1119        spin_unlock_bh(&sch->dev->queue_lock);
1120        nla_nest_cancel(skb, nest);
1121        return -1;
1122}
1123
1124static int
1125htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1126{
1127        struct htb_class *cl = (struct htb_class *)arg;
1128
1129        if (!cl->level && cl->un.leaf.q)
1130                cl->qstats.qlen = cl->un.leaf.q->q.qlen;
1131        cl->xstats.tokens = cl->tokens;
1132        cl->xstats.ctokens = cl->ctokens;
1133
1134        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1135            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1136            gnet_stats_copy_queue(d, &cl->qstats) < 0)
1137                return -1;
1138
1139        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1140}
1141
1142static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1143                     struct Qdisc **old)
1144{
1145        struct htb_class *cl = (struct htb_class *)arg;
1146
1147        if (cl && !cl->level) {
1148                if (new == NULL &&
1149                    (new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1150                                             cl->classid))
1151                    == NULL)
1152                        return -ENOBUFS;
1153                sch_tree_lock(sch);
1154                if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1155                        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1156                        qdisc_reset(*old);
1157                }
1158                sch_tree_unlock(sch);
1159                return 0;
1160        }
1161        return -ENOENT;
1162}
1163
1164static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1165{
1166        struct htb_class *cl = (struct htb_class *)arg;
1167        return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1168}
1169
1170static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1171{
1172        struct htb_class *cl = (struct htb_class *)arg;
1173
1174        if (cl->un.leaf.q->q.qlen == 0)
1175                htb_deactivate(qdisc_priv(sch), cl);
1176}
1177
1178static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1179{
1180        struct htb_class *cl = htb_find(classid, sch);
1181        if (cl)
1182                cl->refcnt++;
1183        return (unsigned long)cl;
1184}
1185
1186static inline int htb_parent_last_child(struct htb_class *cl)
1187{
1188        if (!cl->parent)
1189                /* the root class */
1190                return 0;
1191
1192        if (!(cl->parent->children.next == &cl->sibling &&
1193                cl->parent->children.prev == &cl->sibling))
1194                /* not the last child */
1195                return 0;
1196
1197        return 1;
1198}
1199
1200static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1201                               struct Qdisc *new_q)
1202{
1203        struct htb_class *parent = cl->parent;
1204
1205        BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
1206
1207        if (parent->cmode != HTB_CAN_SEND)
1208                htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
1209
1210        parent->level = 0;
1211        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1212        INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1213        parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1214        parent->un.leaf.quantum = parent->quantum;
1215        parent->un.leaf.prio = parent->prio;
1216        parent->tokens = parent->buffer;
1217        parent->ctokens = parent->cbuffer;
1218        parent->t_c = psched_get_time();
1219        parent->cmode = HTB_CAN_SEND;
1220}
1221
1222static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1223{
1224        struct htb_sched *q = qdisc_priv(sch);
1225
1226        if (!cl->level) {
1227                BUG_TRAP(cl->un.leaf.q);
1228                qdisc_destroy(cl->un.leaf.q);
1229        }
1230        gen_kill_estimator(&cl->bstats, &cl->rate_est);
1231        qdisc_put_rtab(cl->rate);
1232        qdisc_put_rtab(cl->ceil);
1233
1234        tcf_destroy_chain(cl->filter_list);
1235
1236        while (!list_empty(&cl->children))
1237                htb_destroy_class(sch, list_entry(cl->children.next,
1238                                                  struct htb_class, sibling));
1239
1240        /* note: this delete may happen twice (see htb_delete) */
1241        hlist_del_init(&cl->hlist);
1242        list_del(&cl->sibling);
1243
1244        if (cl->prio_activity)
1245                htb_deactivate(q, cl);
1246
1247        if (cl->cmode != HTB_CAN_SEND)
1248                htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
1249
1250        kfree(cl);
1251}
1252
1253/* always caled under BH & queue lock */
1254static void htb_destroy(struct Qdisc *sch)
1255{
1256        struct htb_sched *q = qdisc_priv(sch);
1257
1258        qdisc_watchdog_cancel(&q->watchdog);
1259        /* This line used to be after htb_destroy_class call below
1260           and surprisingly it worked in 2.4. But it must precede it
1261           because filter need its target class alive to be able to call
1262           unbind_filter on it (without Oops). */
1263        tcf_destroy_chain(q->filter_list);
1264
1265        while (!list_empty(&q->root))
1266                htb_destroy_class(sch, list_entry(q->root.next,
1267                                                  struct htb_class, sibling));
1268
1269        __skb_queue_purge(&q->direct_queue);
1270}
1271
1272static int htb_delete(struct Qdisc *sch, unsigned long arg)
1273{
1274        struct htb_sched *q = qdisc_priv(sch);
1275        struct htb_class *cl = (struct htb_class *)arg;
1276        unsigned int qlen;
1277        struct Qdisc *new_q = NULL;
1278        int last_child = 0;
1279
1280        // TODO: why don't allow to delete subtree ? references ? does
1281        // tc subsys quarantee us that in htb_destroy it holds no class
1282        // refs so that we can remove children safely there ?
1283        if (!list_empty(&cl->children) || cl->filter_cnt)
1284                return -EBUSY;
1285
1286        if (!cl->level && htb_parent_last_child(cl)) {
1287                new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1288                                                cl->parent->classid);
1289                last_child = 1;
1290        }
1291
1292        sch_tree_lock(sch);
1293
1294        if (!cl->level) {
1295                qlen = cl->un.leaf.q->q.qlen;
1296                qdisc_reset(cl->un.leaf.q);
1297                qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
1298        }
1299
1300        /* delete from hash and active; remainder in destroy_class */
1301        hlist_del_init(&cl->hlist);
1302
1303        if (cl->prio_activity)
1304                htb_deactivate(q, cl);
1305
1306        if (last_child)
1307                htb_parent_to_leaf(q, cl, new_q);
1308
1309        if (--cl->refcnt == 0)
1310                htb_destroy_class(sch, cl);
1311
1312        sch_tree_unlock(sch);
1313        return 0;
1314}
1315
1316static void htb_put(struct Qdisc *sch, unsigned long arg)
1317{
1318        struct htb_class *cl = (struct htb_class *)arg;
1319
1320        if (--cl->refcnt == 0)
1321                htb_destroy_class(sch, cl);
1322}
1323
1324static int htb_change_class(struct Qdisc *sch, u32 classid,
1325                            u32 parentid, struct nlattr **tca,
1326                            unsigned long *arg)
1327{
1328        int err = -EINVAL;
1329        struct htb_sched *q = qdisc_priv(sch);
1330        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1331        struct nlattr *opt = tca[TCA_OPTIONS];
1332        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1333        struct nlattr *tb[TCA_HTB_RTAB + 1];
1334        struct tc_htb_opt *hopt;
1335
1336        /* extract all subattrs from opt attr */
1337        if (!opt)
1338                goto failure;
1339
1340        err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
1341        if (err < 0)
1342                goto failure;
1343
1344        err = -EINVAL;
1345        if (tb[TCA_HTB_PARMS] == NULL)
1346                goto failure;
1347
1348        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1349
1350        hopt = nla_data(tb[TCA_HTB_PARMS]);
1351
1352        rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1353        ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1354        if (!rtab || !ctab)
1355                goto failure;
1356
1357        if (!cl) {              /* new class */
1358                struct Qdisc *new_q;
1359                int prio;
1360                struct {
1361                        struct nlattr           nla;
1362                        struct gnet_estimator   opt;
1363                } est = {
1364                        .nla = {
1365                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1366                                .nla_type       = TCA_RATE,
1367                        },
1368                        .opt = {
1369                                /* 4s interval, 16s averaging constant */
1370                                .interval       = 2,
1371                                .ewma_log       = 2,
1372                        },
1373                };
1374
1375                /* check for valid classid */
1376                if (!classid || TC_H_MAJ(classid ^ sch->handle)
1377                    || htb_find(classid, sch))
1378                        goto failure;
1379
1380                /* check maximal depth */
1381                if (parent && parent->parent && parent->parent->level < 2) {
1382                        printk(KERN_ERR "htb: tree is too deep\n");
1383                        goto failure;
1384                }
1385                err = -ENOBUFS;
1386                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1387                        goto failure;
1388
1389                gen_new_estimator(&cl->bstats, &cl->rate_est,
1390                                  &sch->dev->queue_lock,
1391                                  tca[TCA_RATE] ? : &est.nla);
1392                cl->refcnt = 1;
1393                INIT_LIST_HEAD(&cl->sibling);
1394                INIT_HLIST_NODE(&cl->hlist);
1395                INIT_LIST_HEAD(&cl->children);
1396                INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1397                RB_CLEAR_NODE(&cl->pq_node);
1398
1399                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1400                        RB_CLEAR_NODE(&cl->node[prio]);
1401
1402                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1403                   so that can't be used inside of sch_tree_lock
1404                   -- thanks to Karlis Peisenieks */
1405                new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid);
1406                sch_tree_lock(sch);
1407                if (parent && !parent->level) {
1408                        unsigned int qlen = parent->un.leaf.q->q.qlen;
1409
1410                        /* turn parent into inner node */
1411                        qdisc_reset(parent->un.leaf.q);
1412                        qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
1413                        qdisc_destroy(parent->un.leaf.q);
1414                        if (parent->prio_activity)
1415                                htb_deactivate(q, parent);
1416
1417                        /* remove from evt list because of level change */
1418                        if (parent->cmode != HTB_CAN_SEND) {
1419                                htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
1420                                parent->cmode = HTB_CAN_SEND;
1421                        }
1422                        parent->level = (parent->parent ? parent->parent->level
1423                                         : TC_HTB_MAXDEPTH) - 1;
1424                        memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1425                }
1426                /* leaf (we) needs elementary qdisc */
1427                cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1428
1429                cl->classid = classid;
1430                cl->parent = parent;
1431
1432                /* set class to be in HTB_CAN_SEND state */
1433                cl->tokens = hopt->buffer;
1434                cl->ctokens = hopt->cbuffer;
1435                cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;        /* 1min */
1436                cl->t_c = psched_get_time();
1437                cl->cmode = HTB_CAN_SEND;
1438
1439                /* attach to the hash list and parent's family */
1440                hlist_add_head(&cl->hlist, q->hash + htb_hash(classid));
1441                list_add_tail(&cl->sibling,
1442                              parent ? &parent->children : &q->root);
1443        } else {
1444                if (tca[TCA_RATE])
1445                        gen_replace_estimator(&cl->bstats, &cl->rate_est,
1446                                              &sch->dev->queue_lock,
1447                                              tca[TCA_RATE]);
1448                sch_tree_lock(sch);
1449        }
1450
1451        /* it used to be a nasty bug here, we have to check that node
1452           is really leaf before changing cl->un.leaf ! */
1453        if (!cl->level) {
1454                cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1455                if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
1456                        printk(KERN_WARNING
1457                               "HTB: quantum of class %X is small. Consider r2q change.\n",
1458                               cl->classid);
1459                        cl->un.leaf.quantum = 1000;
1460                }
1461                if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
1462                        printk(KERN_WARNING
1463                               "HTB: quantum of class %X is big. Consider r2q change.\n",
1464                               cl->classid);
1465                        cl->un.leaf.quantum = 200000;
1466                }
1467                if (hopt->quantum)
1468                        cl->un.leaf.quantum = hopt->quantum;
1469                if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1470                        cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1471
1472                /* backup for htb_parent_to_leaf */
1473                cl->quantum = cl->un.leaf.quantum;
1474                cl->prio = cl->un.leaf.prio;
1475        }
1476
1477        cl->buffer = hopt->buffer;
1478        cl->cbuffer = hopt->cbuffer;
1479        if (cl->rate)
1480                qdisc_put_rtab(cl->rate);
1481        cl->rate = rtab;
1482        if (cl->ceil)
1483                qdisc_put_rtab(cl->ceil);
1484        cl->ceil = ctab;
1485        sch_tree_unlock(sch);
1486
1487        *arg = (unsigned long)cl;
1488        return 0;
1489
1490failure:
1491        if (rtab)
1492                qdisc_put_rtab(rtab);
1493        if (ctab)
1494                qdisc_put_rtab(ctab);
1495        return err;
1496}
1497
1498static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1499{
1500        struct htb_sched *q = qdisc_priv(sch);
1501        struct htb_class *cl = (struct htb_class *)arg;
1502        struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1503
1504        return fl;
1505}
1506
1507static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1508                                     u32 classid)
1509{
1510        struct htb_sched *q = qdisc_priv(sch);
1511        struct htb_class *cl = htb_find(classid, sch);
1512
1513        /*if (cl && !cl->level) return 0;
1514           The line above used to be there to prevent attaching filters to
1515           leaves. But at least tc_index filter uses this just to get class
1516           for other reasons so that we have to allow for it.
1517           ----
1518           19.6.2002 As Werner explained it is ok - bind filter is just
1519           another way to "lock" the class - unlike "get" this lock can
1520           be broken by class during destroy IIUC.
1521         */
1522        if (cl)
1523                cl->filter_cnt++;
1524        else
1525                q->filter_cnt++;
1526        return (unsigned long)cl;
1527}
1528
1529static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1530{
1531        struct htb_sched *q = qdisc_priv(sch);
1532        struct htb_class *cl = (struct htb_class *)arg;
1533
1534        if (cl)
1535                cl->filter_cnt--;
1536        else
1537                q->filter_cnt--;
1538}
1539
1540static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1541{
1542        struct htb_sched *q = qdisc_priv(sch);
1543        int i;
1544
1545        if (arg->stop)
1546                return;
1547
1548        for (i = 0; i < HTB_HSIZE; i++) {
1549                struct hlist_node *p;
1550                struct htb_class *cl;
1551
1552                hlist_for_each_entry(cl, p, q->hash + i, hlist) {
1553                        if (arg->count < arg->skip) {
1554                                arg->count++;
1555                                continue;
1556                        }
1557                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1558                                arg->stop = 1;
1559                                return;
1560                        }
1561                        arg->count++;
1562                }
1563        }
1564}
1565
1566static const struct Qdisc_class_ops htb_class_ops = {
1567        .graft          =       htb_graft,
1568        .leaf           =       htb_leaf,
1569        .qlen_notify    =       htb_qlen_notify,
1570        .get            =       htb_get,
1571        .put            =       htb_put,
1572        .change         =       htb_change_class,
1573        .delete         =       htb_delete,
1574        .walk           =       htb_walk,
1575        .tcf_chain      =       htb_find_tcf,
1576        .bind_tcf       =       htb_bind_filter,
1577        .unbind_tcf     =       htb_unbind_filter,
1578        .dump           =       htb_dump_class,
1579        .dump_stats     =       htb_dump_class_stats,
1580};
1581
1582static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1583        .next           =       NULL,
1584        .cl_ops         =       &htb_class_ops,
1585        .id             =       "htb",
1586        .priv_size      =       sizeof(struct htb_sched),
1587        .enqueue        =       htb_enqueue,
1588        .dequeue        =       htb_dequeue,
1589        .requeue        =       htb_requeue,
1590        .drop           =       htb_drop,
1591        .init           =       htb_init,
1592        .reset          =       htb_reset,
1593        .destroy        =       htb_destroy,
1594        .change         =       NULL /* htb_change */,
1595        .dump           =       htb_dump,
1596        .owner          =       THIS_MODULE,
1597};
1598
1599static int __init htb_module_init(void)
1600{
1601        return register_qdisc(&htb_qdisc_ops);
1602}
1603static void __exit htb_module_exit(void)
1604{
1605        unregister_qdisc(&htb_qdisc_ops);
1606}
1607
1608module_init(htb_module_init)
1609module_exit(htb_module_exit)
1610MODULE_LICENSE("GPL");
1611
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.