linux/net/sched/sch_htb.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
   4 *
   5 * Authors:     Martin Devera, <devik@cdi.cz>
   6 *
   7 * Credits (in time order) for older HTB versions:
   8 *              Stef Coene <stef.coene@docum.org>
   9 *                      HTB support at LARTC mailing list
  10 *              Ondrej Kraus, <krauso@barr.cz>
  11 *                      found missing INIT_QDISC(htb)
  12 *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  13 *                      helped a lot to locate nasty class stall bug
  14 *              Andi Kleen, Jamal Hadi, Bert Hubert
  15 *                      code review and helpful comments on shaping
  16 *              Tomasz Wrona, <tw@eter.tym.pl>
  17 *                      created test case so that I was able to fix nasty bug
  18 *              Wilfried Weissmann
  19 *                      spotted bug in dequeue code and helped with fix
  20 *              Jiri Fojtasek
  21 *                      fixed requeue routine
  22 *              and many others. thanks.
  23 */
  24#include <linux/module.h>
  25#include <linux/moduleparam.h>
  26#include <linux/types.h>
  27#include <linux/kernel.h>
  28#include <linux/string.h>
  29#include <linux/errno.h>
  30#include <linux/skbuff.h>
  31#include <linux/list.h>
  32#include <linux/compiler.h>
  33#include <linux/rbtree.h>
  34#include <linux/workqueue.h>
  35#include <linux/slab.h>
  36#include <net/netlink.h>
  37#include <net/sch_generic.h>
  38#include <net/pkt_sched.h>
  39#include <net/pkt_cls.h>
  40
  41/* HTB algorithm.
  42    Author: devik@cdi.cz
  43    ========================================================================
  44    HTB is like TBF with multiple classes. It is also similar to CBQ because
  45    it allows to assign priority to each class in hierarchy.
  46    In fact it is another implementation of Floyd's formal sharing.
  47
  48    Levels:
  49    Each class is assigned level. Leaf has ALWAYS level 0 and root
  50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  51    one less than their parent.
  52*/
  53
  54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  55#define HTB_VER 0x30011         /* major must be matched with number supplied by TC as version */
  56
  57#if HTB_VER >> 16 != TC_HTB_PROTOVER
  58#error "Mismatched sch_htb.c and pkt_sch.h"
  59#endif
  60
  61/* Module parameter and sysfs export */
  62module_param    (htb_hysteresis, int, 0640);
  63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  64
  65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
  66module_param(htb_rate_est, int, 0640);
  67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
  68
  69/* used internaly to keep status of single class */
  70enum htb_cmode {
  71        HTB_CANT_SEND,          /* class can't send and can't borrow */
  72        HTB_MAY_BORROW,         /* class can't send but may borrow */
  73        HTB_CAN_SEND            /* class can send */
  74};
  75
  76struct htb_prio {
  77        union {
  78                struct rb_root  row;
  79                struct rb_root  feed;
  80        };
  81        struct rb_node  *ptr;
  82        /* When class changes from state 1->2 and disconnects from
  83         * parent's feed then we lost ptr value and start from the
  84         * first child again. Here we store classid of the
  85         * last valid ptr (used when ptr is NULL).
  86         */
  87        u32             last_ptr_id;
  88};
  89
  90/* interior & leaf nodes; props specific to leaves are marked L:
  91 * To reduce false sharing, place mostly read fields at beginning,
  92 * and mostly written ones at the end.
  93 */
  94struct htb_class {
  95        struct Qdisc_class_common common;
  96        struct psched_ratecfg   rate;
  97        struct psched_ratecfg   ceil;
  98        s64                     buffer, cbuffer;/* token bucket depth/rate */
  99        s64                     mbuffer;        /* max wait time */
 100        u32                     prio;           /* these two are used only by leaves... */
 101        int                     quantum;        /* but stored for parent-to-leaf return */
 102
 103        struct tcf_proto __rcu  *filter_list;   /* class attached filters */
 104        struct tcf_block        *block;
 105        int                     filter_cnt;
 106
 107        int                     level;          /* our level (see above) */
 108        unsigned int            children;
 109        struct htb_class        *parent;        /* parent class */
 110
 111        struct net_rate_estimator __rcu *rate_est;
 112
 113        /*
 114         * Written often fields
 115         */
 116        struct gnet_stats_basic_packed bstats;
 117        struct gnet_stats_basic_packed bstats_bias;
 118        struct tc_htb_xstats    xstats; /* our special stats */
 119
 120        /* token bucket parameters */
 121        s64                     tokens, ctokens;/* current number of tokens */
 122        s64                     t_c;            /* checkpoint time */
 123
 124        union {
 125                struct htb_class_leaf {
 126                        int             deficit[TC_HTB_MAXDEPTH];
 127                        struct Qdisc    *q;
 128                } leaf;
 129                struct htb_class_inner {
 130                        struct htb_prio clprio[TC_HTB_NUMPRIO];
 131                } inner;
 132        };
 133        s64                     pq_key;
 134
 135        int                     prio_activity;  /* for which prios are we active */
 136        enum htb_cmode          cmode;          /* current mode of the class */
 137        struct rb_node          pq_node;        /* node for event queue */
 138        struct rb_node          node[TC_HTB_NUMPRIO];   /* node for self or feed tree */
 139
 140        unsigned int drops ____cacheline_aligned_in_smp;
 141        unsigned int            overlimits;
 142};
 143
 144struct htb_level {
 145        struct rb_root  wait_pq;
 146        struct htb_prio hprio[TC_HTB_NUMPRIO];
 147};
 148
 149struct htb_sched {
 150        struct Qdisc_class_hash clhash;
 151        int                     defcls;         /* class where unclassified flows go to */
 152        int                     rate2quantum;   /* quant = rate / rate2quantum */
 153
 154        /* filters for qdisc itself */
 155        struct tcf_proto __rcu  *filter_list;
 156        struct tcf_block        *block;
 157
 158#define HTB_WARN_TOOMANYEVENTS  0x1
 159        unsigned int            warned; /* only one warning */
 160        int                     direct_qlen;
 161        struct work_struct      work;
 162
 163        /* non shaped skbs; let them go directly thru */
 164        struct qdisc_skb_head   direct_queue;
 165        u32                     direct_pkts;
 166        u32                     overlimits;
 167
 168        struct qdisc_watchdog   watchdog;
 169
 170        s64                     now;    /* cached dequeue time */
 171
 172        /* time of nearest event per level (row) */
 173        s64                     near_ev_cache[TC_HTB_MAXDEPTH];
 174
 175        int                     row_mask[TC_HTB_MAXDEPTH];
 176
 177        struct htb_level        hlevel[TC_HTB_MAXDEPTH];
 178
 179        struct Qdisc            **direct_qdiscs;
 180        unsigned int            num_direct_qdiscs;
 181
 182        bool                    offload;
 183};
 184
 185/* find class in global hash table using given handle */
 186static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 187{
 188        struct htb_sched *q = qdisc_priv(sch);
 189        struct Qdisc_class_common *clc;
 190
 191        clc = qdisc_class_find(&q->clhash, handle);
 192        if (clc == NULL)
 193                return NULL;
 194        return container_of(clc, struct htb_class, common);
 195}
 196
 197static unsigned long htb_search(struct Qdisc *sch, u32 handle)
 198{
 199        return (unsigned long)htb_find(handle, sch);
 200}
 201/**
 202 * htb_classify - classify a packet into class
 203 *
 204 * It returns NULL if the packet should be dropped or -1 if the packet
 205 * should be passed directly thru. In all other cases leaf class is returned.
 206 * We allow direct class selection by classid in priority. The we examine
 207 * filters in qdisc and in inner nodes (if higher filter points to the inner
 208 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 209 * internal fifo (direct). These packets then go directly thru. If we still
 210 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 211 * then finish and return direct queue.
 212 */
 213#define HTB_DIRECT ((struct htb_class *)-1L)
 214
 215static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 216                                      int *qerr)
 217{
 218        struct htb_sched *q = qdisc_priv(sch);
 219        struct htb_class *cl;
 220        struct tcf_result res;
 221        struct tcf_proto *tcf;
 222        int result;
 223
 224        /* allow to select class by setting skb->priority to valid classid;
 225         * note that nfmark can be used too by attaching filter fw with no
 226         * rules in it
 227         */
 228        if (skb->priority == sch->handle)
 229                return HTB_DIRECT;      /* X:0 (direct flow) selected */
 230        cl = htb_find(skb->priority, sch);
 231        if (cl) {
 232                if (cl->level == 0)
 233                        return cl;
 234                /* Start with inner filter chain if a non-leaf class is selected */
 235                tcf = rcu_dereference_bh(cl->filter_list);
 236        } else {
 237                tcf = rcu_dereference_bh(q->filter_list);
 238        }
 239
 240        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 241        while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
 242#ifdef CONFIG_NET_CLS_ACT
 243                switch (result) {
 244                case TC_ACT_QUEUED:
 245                case TC_ACT_STOLEN:
 246                case TC_ACT_TRAP:
 247                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 248                        fallthrough;
 249                case TC_ACT_SHOT:
 250                        return NULL;
 251                }
 252#endif
 253                cl = (void *)res.class;
 254                if (!cl) {
 255                        if (res.classid == sch->handle)
 256                                return HTB_DIRECT;      /* X:0 (direct flow) */
 257                        cl = htb_find(res.classid, sch);
 258                        if (!cl)
 259                                break;  /* filter selected invalid classid */
 260                }
 261                if (!cl->level)
 262                        return cl;      /* we hit leaf; return it */
 263
 264                /* we have got inner class; apply inner filter chain */
 265                tcf = rcu_dereference_bh(cl->filter_list);
 266        }
 267        /* classification failed; try to use default class */
 268        cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 269        if (!cl || cl->level)
 270                return HTB_DIRECT;      /* bad default .. this is safe bet */
 271        return cl;
 272}
 273
 274/**
 275 * htb_add_to_id_tree - adds class to the round robin list
 276 * @root: the root of the tree
 277 * @cl: the class to add
 278 * @prio: the give prio in class
 279 *
 280 * Routine adds class to the list (actually tree) sorted by classid.
 281 * Make sure that class is not already on such list for given prio.
 282 */
 283static void htb_add_to_id_tree(struct rb_root *root,
 284                               struct htb_class *cl, int prio)
 285{
 286        struct rb_node **p = &root->rb_node, *parent = NULL;
 287
 288        while (*p) {
 289                struct htb_class *c;
 290                parent = *p;
 291                c = rb_entry(parent, struct htb_class, node[prio]);
 292
 293                if (cl->common.classid > c->common.classid)
 294                        p = &parent->rb_right;
 295                else
 296                        p = &parent->rb_left;
 297        }
 298        rb_link_node(&cl->node[prio], parent, p);
 299        rb_insert_color(&cl->node[prio], root);
 300}
 301
 302/**
 303 * htb_add_to_wait_tree - adds class to the event queue with delay
 304 * @q: the priority event queue
 305 * @cl: the class to add
 306 * @delay: delay in microseconds
 307 *
 308 * The class is added to priority event queue to indicate that class will
 309 * change its mode in cl->pq_key microseconds. Make sure that class is not
 310 * already in the queue.
 311 */
 312static void htb_add_to_wait_tree(struct htb_sched *q,
 313                                 struct htb_class *cl, s64 delay)
 314{
 315        struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
 316
 317        cl->pq_key = q->now + delay;
 318        if (cl->pq_key == q->now)
 319                cl->pq_key++;
 320
 321        /* update the nearest event cache */
 322        if (q->near_ev_cache[cl->level] > cl->pq_key)
 323                q->near_ev_cache[cl->level] = cl->pq_key;
 324
 325        while (*p) {
 326                struct htb_class *c;
 327                parent = *p;
 328                c = rb_entry(parent, struct htb_class, pq_node);
 329                if (cl->pq_key >= c->pq_key)
 330                        p = &parent->rb_right;
 331                else
 332                        p = &parent->rb_left;
 333        }
 334        rb_link_node(&cl->pq_node, parent, p);
 335        rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 336}
 337
 338/**
 339 * htb_next_rb_node - finds next node in binary tree
 340 * @n: the current node in binary tree
 341 *
 342 * When we are past last key we return NULL.
 343 * Average complexity is 2 steps per call.
 344 */
 345static inline void htb_next_rb_node(struct rb_node **n)
 346{
 347        *n = rb_next(*n);
 348}
 349
 350/**
 351 * htb_add_class_to_row - add class to its row
 352 * @q: the priority event queue
 353 * @cl: the class to add
 354 * @mask: the given priorities in class in bitmap
 355 *
 356 * The class is added to row at priorities marked in mask.
 357 * It does nothing if mask == 0.
 358 */
 359static inline void htb_add_class_to_row(struct htb_sched *q,
 360                                        struct htb_class *cl, int mask)
 361{
 362        q->row_mask[cl->level] |= mask;
 363        while (mask) {
 364                int prio = ffz(~mask);
 365                mask &= ~(1 << prio);
 366                htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
 367        }
 368}
 369
 370/* If this triggers, it is a bug in this code, but it need not be fatal */
 371static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 372{
 373        if (RB_EMPTY_NODE(rb)) {
 374                WARN_ON(1);
 375        } else {
 376                rb_erase(rb, root);
 377                RB_CLEAR_NODE(rb);
 378        }
 379}
 380
 381
 382/**
 383 * htb_remove_class_from_row - removes class from its row
 384 * @q: the priority event queue
 385 * @cl: the class to add
 386 * @mask: the given priorities in class in bitmap
 387 *
 388 * The class is removed from row at priorities marked in mask.
 389 * It does nothing if mask == 0.
 390 */
 391static inline void htb_remove_class_from_row(struct htb_sched *q,
 392                                                 struct htb_class *cl, int mask)
 393{
 394        int m = 0;
 395        struct htb_level *hlevel = &q->hlevel[cl->level];
 396
 397        while (mask) {
 398                int prio = ffz(~mask);
 399                struct htb_prio *hprio = &hlevel->hprio[prio];
 400
 401                mask &= ~(1 << prio);
 402                if (hprio->ptr == cl->node + prio)
 403                        htb_next_rb_node(&hprio->ptr);
 404
 405                htb_safe_rb_erase(cl->node + prio, &hprio->row);
 406                if (!hprio->row.rb_node)
 407                        m |= 1 << prio;
 408        }
 409        q->row_mask[cl->level] &= ~m;
 410}
 411
 412/**
 413 * htb_activate_prios - creates active classe's feed chain
 414 * @q: the priority event queue
 415 * @cl: the class to activate
 416 *
 417 * The class is connected to ancestors and/or appropriate rows
 418 * for priorities it is participating on. cl->cmode must be new
 419 * (activated) mode. It does nothing if cl->prio_activity == 0.
 420 */
 421static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 422{
 423        struct htb_class *p = cl->parent;
 424        long m, mask = cl->prio_activity;
 425
 426        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 427                m = mask;
 428                while (m) {
 429                        int prio = ffz(~m);
 430                        m &= ~(1 << prio);
 431
 432                        if (p->inner.clprio[prio].feed.rb_node)
 433                                /* parent already has its feed in use so that
 434                                 * reset bit in mask as parent is already ok
 435                                 */
 436                                mask &= ~(1 << prio);
 437
 438                        htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
 439                }
 440                p->prio_activity |= mask;
 441                cl = p;
 442                p = cl->parent;
 443
 444        }
 445        if (cl->cmode == HTB_CAN_SEND && mask)
 446                htb_add_class_to_row(q, cl, mask);
 447}
 448
 449/**
 450 * htb_deactivate_prios - remove class from feed chain
 451 * @q: the priority event queue
 452 * @cl: the class to deactivate
 453 *
 454 * cl->cmode must represent old mode (before deactivation). It does
 455 * nothing if cl->prio_activity == 0. Class is removed from all feed
 456 * chains and rows.
 457 */
 458static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 459{
 460        struct htb_class *p = cl->parent;
 461        long m, mask = cl->prio_activity;
 462
 463        while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 464                m = mask;
 465                mask = 0;
 466                while (m) {
 467                        int prio = ffz(~m);
 468                        m &= ~(1 << prio);
 469
 470                        if (p->inner.clprio[prio].ptr == cl->node + prio) {
 471                                /* we are removing child which is pointed to from
 472                                 * parent feed - forget the pointer but remember
 473                                 * classid
 474                                 */
 475                                p->inner.clprio[prio].last_ptr_id = cl->common.classid;
 476                                p->inner.clprio[prio].ptr = NULL;
 477                        }
 478
 479                        htb_safe_rb_erase(cl->node + prio,
 480                                          &p->inner.clprio[prio].feed);
 481
 482                        if (!p->inner.clprio[prio].feed.rb_node)
 483                                mask |= 1 << prio;
 484                }
 485
 486                p->prio_activity &= ~mask;
 487                cl = p;
 488                p = cl->parent;
 489
 490        }
 491        if (cl->cmode == HTB_CAN_SEND && mask)
 492                htb_remove_class_from_row(q, cl, mask);
 493}
 494
 495static inline s64 htb_lowater(const struct htb_class *cl)
 496{
 497        if (htb_hysteresis)
 498                return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 499        else
 500                return 0;
 501}
 502static inline s64 htb_hiwater(const struct htb_class *cl)
 503{
 504        if (htb_hysteresis)
 505                return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 506        else
 507                return 0;
 508}
 509
 510
 511/**
 512 * htb_class_mode - computes and returns current class mode
 513 * @cl: the target class
 514 * @diff: diff time in microseconds
 515 *
 516 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 517 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 518 * from now to time when cl will change its state.
 519 * Also it is worth to note that class mode doesn't change simply
 520 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 521 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 522 * mode transitions per time unit. The speed gain is about 1/6.
 523 */
 524static inline enum htb_cmode
 525htb_class_mode(struct htb_class *cl, s64 *diff)
 526{
 527        s64 toks;
 528
 529        if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 530                *diff = -toks;
 531                return HTB_CANT_SEND;
 532        }
 533
 534        if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 535                return HTB_CAN_SEND;
 536
 537        *diff = -toks;
 538        return HTB_MAY_BORROW;
 539}
 540
 541/**
 542 * htb_change_class_mode - changes classe's mode
 543 * @q: the priority event queue
 544 * @cl: the target class
 545 * @diff: diff time in microseconds
 546 *
 547 * This should be the only way how to change classe's mode under normal
 548 * circumstances. Routine will update feed lists linkage, change mode
 549 * and add class to the wait event queue if appropriate. New mode should
 550 * be different from old one and cl->pq_key has to be valid if changing
 551 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 552 */
 553static void
 554htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 555{
 556        enum htb_cmode new_mode = htb_class_mode(cl, diff);
 557
 558        if (new_mode == cl->cmode)
 559                return;
 560
 561        if (new_mode == HTB_CANT_SEND) {
 562                cl->overlimits++;
 563                q->overlimits++;
 564        }
 565
 566        if (cl->prio_activity) {        /* not necessary: speed optimization */
 567                if (cl->cmode != HTB_CANT_SEND)
 568                        htb_deactivate_prios(q, cl);
 569                cl->cmode = new_mode;
 570                if (new_mode != HTB_CANT_SEND)
 571                        htb_activate_prios(q, cl);
 572        } else
 573                cl->cmode = new_mode;
 574}
 575
 576/**
 577 * htb_activate - inserts leaf cl into appropriate active feeds
 578 * @q: the priority event queue
 579 * @cl: the target class
 580 *
 581 * Routine learns (new) priority of leaf and activates feed chain
 582 * for the prio. It can be called on already active leaf safely.
 583 * It also adds leaf into droplist.
 584 */
 585static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 586{
 587        WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
 588
 589        if (!cl->prio_activity) {
 590                cl->prio_activity = 1 << cl->prio;
 591                htb_activate_prios(q, cl);
 592        }
 593}
 594
 595/**
 596 * htb_deactivate - remove leaf cl from active feeds
 597 * @q: the priority event queue
 598 * @cl: the target class
 599 *
 600 * Make sure that leaf is active. In the other words it can't be called
 601 * with non-active leaf. It also removes class from the drop list.
 602 */
 603static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 604{
 605        WARN_ON(!cl->prio_activity);
 606
 607        htb_deactivate_prios(q, cl);
 608        cl->prio_activity = 0;
 609}
 610
 611static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 612                       struct sk_buff **to_free)
 613{
 614        int ret;
 615        unsigned int len = qdisc_pkt_len(skb);
 616        struct htb_sched *q = qdisc_priv(sch);
 617        struct htb_class *cl = htb_classify(skb, sch, &ret);
 618
 619        if (cl == HTB_DIRECT) {
 620                /* enqueue to helper queue */
 621                if (q->direct_queue.qlen < q->direct_qlen) {
 622                        __qdisc_enqueue_tail(skb, &q->direct_queue);
 623                        q->direct_pkts++;
 624                } else {
 625                        return qdisc_drop(skb, sch, to_free);
 626                }
 627#ifdef CONFIG_NET_CLS_ACT
 628        } else if (!cl) {
 629                if (ret & __NET_XMIT_BYPASS)
 630                        qdisc_qstats_drop(sch);
 631                __qdisc_drop(skb, to_free);
 632                return ret;
 633#endif
 634        } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
 635                                        to_free)) != NET_XMIT_SUCCESS) {
 636                if (net_xmit_drop_count(ret)) {
 637                        qdisc_qstats_drop(sch);
 638                        cl->drops++;
 639                }
 640                return ret;
 641        } else {
 642                htb_activate(q, cl);
 643        }
 644
 645        sch->qstats.backlog += len;
 646        sch->q.qlen++;
 647        return NET_XMIT_SUCCESS;
 648}
 649
 650static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 651{
 652        s64 toks = diff + cl->tokens;
 653
 654        if (toks > cl->buffer)
 655                toks = cl->buffer;
 656        toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 657        if (toks <= -cl->mbuffer)
 658                toks = 1 - cl->mbuffer;
 659
 660        cl->tokens = toks;
 661}
 662
 663static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 664{
 665        s64 toks = diff + cl->ctokens;
 666
 667        if (toks > cl->cbuffer)
 668                toks = cl->cbuffer;
 669        toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 670        if (toks <= -cl->mbuffer)
 671                toks = 1 - cl->mbuffer;
 672
 673        cl->ctokens = toks;
 674}
 675
 676/**
 677 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 678 * @q: the priority event queue
 679 * @cl: the class to start iterate
 680 * @level: the minimum level to account
 681 * @skb: the socket buffer
 682 *
 683 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 684 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 685 * leaf and all ancestors and to rate bucket for ancestors at levels
 686 * "level" and higher. It also handles possible change of mode resulting
 687 * from the update. Note that mode can also increase here (MAY_BORROW to
 688 * CAN_SEND) because we can use more precise clock that event queue here.
 689 * In such case we remove class from event queue first.
 690 */
 691static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 692                             int level, struct sk_buff *skb)
 693{
 694        int bytes = qdisc_pkt_len(skb);
 695        enum htb_cmode old_mode;
 696        s64 diff;
 697
 698        while (cl) {
 699                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 700                if (cl->level >= level) {
 701                        if (cl->level == level)
 702                                cl->xstats.lends++;
 703                        htb_accnt_tokens(cl, bytes, diff);
 704                } else {
 705                        cl->xstats.borrows++;
 706                        cl->tokens += diff;     /* we moved t_c; update tokens */
 707                }
 708                htb_accnt_ctokens(cl, bytes, diff);
 709                cl->t_c = q->now;
 710
 711                old_mode = cl->cmode;
 712                diff = 0;
 713                htb_change_class_mode(q, cl, &diff);
 714                if (old_mode != cl->cmode) {
 715                        if (old_mode != HTB_CAN_SEND)
 716                                htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 717                        if (cl->cmode != HTB_CAN_SEND)
 718                                htb_add_to_wait_tree(q, cl, diff);
 719                }
 720
 721                /* update basic stats except for leaves which are already updated */
 722                if (cl->level)
 723                        bstats_update(&cl->bstats, skb);
 724
 725                cl = cl->parent;
 726        }
 727}
 728
 729/**
 730 * htb_do_events - make mode changes to classes at the level
 731 * @q: the priority event queue
 732 * @level: which wait_pq in 'q->hlevel'
 733 * @start: start jiffies
 734 *
 735 * Scans event queue for pending events and applies them. Returns time of
 736 * next pending event (0 for no event in pq, q->now for too many events).
 737 * Note: Applied are events whose have cl->pq_key <= q->now.
 738 */
 739static s64 htb_do_events(struct htb_sched *q, const int level,
 740                         unsigned long start)
 741{
 742        /* don't run for longer than 2 jiffies; 2 is used instead of
 743         * 1 to simplify things when jiffy is going to be incremented
 744         * too soon
 745         */
 746        unsigned long stop_at = start + 2;
 747        struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
 748
 749        while (time_before(jiffies, stop_at)) {
 750                struct htb_class *cl;
 751                s64 diff;
 752                struct rb_node *p = rb_first(wait_pq);
 753
 754                if (!p)
 755                        return 0;
 756
 757                cl = rb_entry(p, struct htb_class, pq_node);
 758                if (cl->pq_key > q->now)
 759                        return cl->pq_key;
 760
 761                htb_safe_rb_erase(p, wait_pq);
 762                diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 763                htb_change_class_mode(q, cl, &diff);
 764                if (cl->cmode != HTB_CAN_SEND)
 765                        htb_add_to_wait_tree(q, cl, diff);
 766        }
 767
 768        /* too much load - let's continue after a break for scheduling */
 769        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 770                pr_warn("htb: too many events!\n");
 771                q->warned |= HTB_WARN_TOOMANYEVENTS;
 772        }
 773
 774        return q->now;
 775}
 776
 777/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 778 * is no such one exists.
 779 */
 780static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 781                                              u32 id)
 782{
 783        struct rb_node *r = NULL;
 784        while (n) {
 785                struct htb_class *cl =
 786                    rb_entry(n, struct htb_class, node[prio]);
 787
 788                if (id > cl->common.classid) {
 789                        n = n->rb_right;
 790                } else if (id < cl->common.classid) {
 791                        r = n;
 792                        n = n->rb_left;
 793                } else {
 794                        return n;
 795                }
 796        }
 797        return r;
 798}
 799
 800/**
 801 * htb_lookup_leaf - returns next leaf class in DRR order
 802 * @hprio: the current one
 803 * @prio: which prio in class
 804 *
 805 * Find leaf where current feed pointers points to.
 806 */
 807static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
 808{
 809        int i;
 810        struct {
 811                struct rb_node *root;
 812                struct rb_node **pptr;
 813                u32 *pid;
 814        } stk[TC_HTB_MAXDEPTH], *sp = stk;
 815
 816        BUG_ON(!hprio->row.rb_node);
 817        sp->root = hprio->row.rb_node;
 818        sp->pptr = &hprio->ptr;
 819        sp->pid = &hprio->last_ptr_id;
 820
 821        for (i = 0; i < 65535; i++) {
 822                if (!*sp->pptr && *sp->pid) {
 823                        /* ptr was invalidated but id is valid - try to recover
 824                         * the original or next ptr
 825                         */
 826                        *sp->pptr =
 827                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
 828                }
 829                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
 830                                 * can become out of date quickly
 831                                 */
 832                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
 833                        *sp->pptr = sp->root;
 834                        while ((*sp->pptr)->rb_left)
 835                                *sp->pptr = (*sp->pptr)->rb_left;
 836                        if (sp > stk) {
 837                                sp--;
 838                                if (!*sp->pptr) {
 839                                        WARN_ON(1);
 840                                        return NULL;
 841                                }
 842                                htb_next_rb_node(sp->pptr);
 843                        }
 844                } else {
 845                        struct htb_class *cl;
 846                        struct htb_prio *clp;
 847
 848                        cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 849                        if (!cl->level)
 850                                return cl;
 851                        clp = &cl->inner.clprio[prio];
 852                        (++sp)->root = clp->feed.rb_node;
 853                        sp->pptr = &clp->ptr;
 854                        sp->pid = &clp->last_ptr_id;
 855                }
 856        }
 857        WARN_ON(1);
 858        return NULL;
 859}
 860
 861/* dequeues packet at given priority and level; call only if
 862 * you are sure that there is active class at prio/level
 863 */
 864static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
 865                                        const int level)
 866{
 867        struct sk_buff *skb = NULL;
 868        struct htb_class *cl, *start;
 869        struct htb_level *hlevel = &q->hlevel[level];
 870        struct htb_prio *hprio = &hlevel->hprio[prio];
 871
 872        /* look initial class up in the row */
 873        start = cl = htb_lookup_leaf(hprio, prio);
 874
 875        do {
 876next:
 877                if (unlikely(!cl))
 878                        return NULL;
 879
 880                /* class can be empty - it is unlikely but can be true if leaf
 881                 * qdisc drops packets in enqueue routine or if someone used
 882                 * graft operation on the leaf since last dequeue;
 883                 * simply deactivate and skip such class
 884                 */
 885                if (unlikely(cl->leaf.q->q.qlen == 0)) {
 886                        struct htb_class *next;
 887                        htb_deactivate(q, cl);
 888
 889                        /* row/level might become empty */
 890                        if ((q->row_mask[level] & (1 << prio)) == 0)
 891                                return NULL;
 892
 893                        next = htb_lookup_leaf(hprio, prio);
 894
 895                        if (cl == start)        /* fix start if we just deleted it */
 896                                start = next;
 897                        cl = next;
 898                        goto next;
 899                }
 900
 901                skb = cl->leaf.q->dequeue(cl->leaf.q);
 902                if (likely(skb != NULL))
 903                        break;
 904
 905                qdisc_warn_nonwc("htb", cl->leaf.q);
 906                htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
 907                                         &q->hlevel[0].hprio[prio].ptr);
 908                cl = htb_lookup_leaf(hprio, prio);
 909
 910        } while (cl != start);
 911
 912        if (likely(skb != NULL)) {
 913                bstats_update(&cl->bstats, skb);
 914                cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
 915                if (cl->leaf.deficit[level] < 0) {
 916                        cl->leaf.deficit[level] += cl->quantum;
 917                        htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
 918                                                 &q->hlevel[0].hprio[prio].ptr);
 919                }
 920                /* this used to be after charge_class but this constelation
 921                 * gives us slightly better performance
 922                 */
 923                if (!cl->leaf.q->q.qlen)
 924                        htb_deactivate(q, cl);
 925                htb_charge_class(q, cl, level, skb);
 926        }
 927        return skb;
 928}
 929
 930static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 931{
 932        struct sk_buff *skb;
 933        struct htb_sched *q = qdisc_priv(sch);
 934        int level;
 935        s64 next_event;
 936        unsigned long start_at;
 937
 938        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
 939        skb = __qdisc_dequeue_head(&q->direct_queue);
 940        if (skb != NULL) {
 941ok:
 942                qdisc_bstats_update(sch, skb);
 943                qdisc_qstats_backlog_dec(sch, skb);
 944                sch->q.qlen--;
 945                return skb;
 946        }
 947
 948        if (!sch->q.qlen)
 949                goto fin;
 950        q->now = ktime_get_ns();
 951        start_at = jiffies;
 952
 953        next_event = q->now + 5LLU * NSEC_PER_SEC;
 954
 955        for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 956                /* common case optimization - skip event handler quickly */
 957                int m;
 958                s64 event = q->near_ev_cache[level];
 959
 960                if (q->now >= event) {
 961                        event = htb_do_events(q, level, start_at);
 962                        if (!event)
 963                                event = q->now + NSEC_PER_SEC;
 964                        q->near_ev_cache[level] = event;
 965                }
 966
 967                if (next_event > event)
 968                        next_event = event;
 969
 970                m = ~q->row_mask[level];
 971                while (m != (int)(-1)) {
 972                        int prio = ffz(m);
 973
 974                        m |= 1 << prio;
 975                        skb = htb_dequeue_tree(q, prio, level);
 976                        if (likely(skb != NULL))
 977                                goto ok;
 978                }
 979        }
 980        if (likely(next_event > q->now))
 981                qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
 982        else
 983                schedule_work(&q->work);
 984fin:
 985        return skb;
 986}
 987
 988/* reset all classes */
 989/* always caled under BH & queue lock */
 990static void htb_reset(struct Qdisc *sch)
 991{
 992        struct htb_sched *q = qdisc_priv(sch);
 993        struct htb_class *cl;
 994        unsigned int i;
 995
 996        for (i = 0; i < q->clhash.hashsize; i++) {
 997                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 998                        if (cl->level)
 999                                memset(&cl->inner, 0, sizeof(cl->inner));
1000                        else {
1001                                if (cl->leaf.q && !q->offload)
1002                                        qdisc_reset(cl->leaf.q);
1003                        }
1004                        cl->prio_activity = 0;
1005                        cl->cmode = HTB_CAN_SEND;
1006                }
1007        }
1008        qdisc_watchdog_cancel(&q->watchdog);
1009        __qdisc_reset_queue(&q->direct_queue);
1010        sch->q.qlen = 0;
1011        sch->qstats.backlog = 0;
1012        memset(q->hlevel, 0, sizeof(q->hlevel));
1013        memset(q->row_mask, 0, sizeof(q->row_mask));
1014}
1015
1016static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1017        [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1018        [TCA_HTB_INIT]  = { .len = sizeof(struct tc_htb_glob) },
1019        [TCA_HTB_CTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1020        [TCA_HTB_RTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021        [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1022        [TCA_HTB_RATE64] = { .type = NLA_U64 },
1023        [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1024        [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1025};
1026
1027static void htb_work_func(struct work_struct *work)
1028{
1029        struct htb_sched *q = container_of(work, struct htb_sched, work);
1030        struct Qdisc *sch = q->watchdog.qdisc;
1031
1032        rcu_read_lock();
1033        __netif_schedule(qdisc_root(sch));
1034        rcu_read_unlock();
1035}
1036
1037static void htb_set_lockdep_class_child(struct Qdisc *q)
1038{
1039        static struct lock_class_key child_key;
1040
1041        lockdep_set_class(qdisc_lock(q), &child_key);
1042}
1043
1044static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1045{
1046        return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1047}
1048
1049static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1050                    struct netlink_ext_ack *extack)
1051{
1052        struct net_device *dev = qdisc_dev(sch);
1053        struct tc_htb_qopt_offload offload_opt;
1054        struct htb_sched *q = qdisc_priv(sch);
1055        struct nlattr *tb[TCA_HTB_MAX + 1];
1056        struct tc_htb_glob *gopt;
1057        unsigned int ntx;
1058        bool offload;
1059        int err;
1060
1061        qdisc_watchdog_init(&q->watchdog, sch);
1062        INIT_WORK(&q->work, htb_work_func);
1063
1064        if (!opt)
1065                return -EINVAL;
1066
1067        err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1068        if (err)
1069                return err;
1070
1071        err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1072                                          NULL);
1073        if (err < 0)
1074                return err;
1075
1076        if (!tb[TCA_HTB_INIT])
1077                return -EINVAL;
1078
1079        gopt = nla_data(tb[TCA_HTB_INIT]);
1080        if (gopt->version != HTB_VER >> 16)
1081                return -EINVAL;
1082
1083        offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1084
1085        if (offload) {
1086                if (sch->parent != TC_H_ROOT)
1087                        return -EOPNOTSUPP;
1088
1089                if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1090                        return -EOPNOTSUPP;
1091
1092                q->num_direct_qdiscs = dev->real_num_tx_queues;
1093                q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1094                                           sizeof(*q->direct_qdiscs),
1095                                           GFP_KERNEL);
1096                if (!q->direct_qdiscs)
1097                        return -ENOMEM;
1098        }
1099
1100        err = qdisc_class_hash_init(&q->clhash);
1101        if (err < 0)
1102                goto err_free_direct_qdiscs;
1103
1104        qdisc_skb_head_init(&q->direct_queue);
1105
1106        if (tb[TCA_HTB_DIRECT_QLEN])
1107                q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1108        else
1109                q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1110
1111        if ((q->rate2quantum = gopt->rate2quantum) < 1)
1112                q->rate2quantum = 1;
1113        q->defcls = gopt->defcls;
1114
1115        if (!offload)
1116                return 0;
1117
1118        for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1119                struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1120                struct Qdisc *qdisc;
1121
1122                qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1123                                          TC_H_MAKE(sch->handle, 0), extack);
1124                if (!qdisc) {
1125                        err = -ENOMEM;
1126                        goto err_free_qdiscs;
1127                }
1128
1129                htb_set_lockdep_class_child(qdisc);
1130                q->direct_qdiscs[ntx] = qdisc;
1131                qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1132        }
1133
1134        sch->flags |= TCQ_F_MQROOT;
1135
1136        offload_opt = (struct tc_htb_qopt_offload) {
1137                .command = TC_HTB_CREATE,
1138                .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1139                .classid = TC_H_MIN(q->defcls),
1140                .extack = extack,
1141        };
1142        err = htb_offload(dev, &offload_opt);
1143        if (err)
1144                goto err_free_qdiscs;
1145
1146        /* Defer this assignment, so that htb_destroy skips offload-related
1147         * parts (especially calling ndo_setup_tc) on errors.
1148         */
1149        q->offload = true;
1150
1151        return 0;
1152
1153err_free_qdiscs:
1154        for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1155             ntx++)
1156                qdisc_put(q->direct_qdiscs[ntx]);
1157
1158        qdisc_class_hash_destroy(&q->clhash);
1159        /* Prevent use-after-free and double-free when htb_destroy gets called.
1160         */
1161        q->clhash.hash = NULL;
1162        q->clhash.hashsize = 0;
1163
1164err_free_direct_qdiscs:
1165        kfree(q->direct_qdiscs);
1166        q->direct_qdiscs = NULL;
1167        return err;
1168}
1169
1170static void htb_attach_offload(struct Qdisc *sch)
1171{
1172        struct net_device *dev = qdisc_dev(sch);
1173        struct htb_sched *q = qdisc_priv(sch);
1174        unsigned int ntx;
1175
1176        for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1177                struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1178
1179                old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1180                qdisc_put(old);
1181                qdisc_hash_add(qdisc, false);
1182        }
1183        for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1184                struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1185                struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1186
1187                qdisc_put(old);
1188        }
1189
1190        kfree(q->direct_qdiscs);
1191        q->direct_qdiscs = NULL;
1192}
1193
1194static void htb_attach_software(struct Qdisc *sch)
1195{
1196        struct net_device *dev = qdisc_dev(sch);
1197        unsigned int ntx;
1198
1199        /* Resemble qdisc_graft behavior. */
1200        for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1201                struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1202                struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1203
1204                qdisc_refcount_inc(sch);
1205
1206                qdisc_put(old);
1207        }
1208}
1209
1210static void htb_attach(struct Qdisc *sch)
1211{
1212        struct htb_sched *q = qdisc_priv(sch);
1213
1214        if (q->offload)
1215                htb_attach_offload(sch);
1216        else
1217                htb_attach_software(sch);
1218}
1219
1220static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1221{
1222        struct htb_sched *q = qdisc_priv(sch);
1223        struct nlattr *nest;
1224        struct tc_htb_glob gopt;
1225
1226        if (q->offload)
1227                sch->flags |= TCQ_F_OFFLOADED;
1228        else
1229                sch->flags &= ~TCQ_F_OFFLOADED;
1230
1231        sch->qstats.overlimits = q->overlimits;
1232        /* Its safe to not acquire qdisc lock. As we hold RTNL,
1233         * no change can happen on the qdisc parameters.
1234         */
1235
1236        gopt.direct_pkts = q->direct_pkts;
1237        gopt.version = HTB_VER;
1238        gopt.rate2quantum = q->rate2quantum;
1239        gopt.defcls = q->defcls;
1240        gopt.debug = 0;
1241
1242        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1243        if (nest == NULL)
1244                goto nla_put_failure;
1245        if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1246            nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1247                goto nla_put_failure;
1248        if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1249                goto nla_put_failure;
1250
1251        return nla_nest_end(skb, nest);
1252
1253nla_put_failure:
1254        nla_nest_cancel(skb, nest);
1255        return -1;
1256}
1257
1258static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1259                          struct sk_buff *skb, struct tcmsg *tcm)
1260{
1261        struct htb_class *cl = (struct htb_class *)arg;
1262        struct htb_sched *q = qdisc_priv(sch);
1263        struct nlattr *nest;
1264        struct tc_htb_opt opt;
1265
1266        /* Its safe to not acquire qdisc lock. As we hold RTNL,
1267         * no change can happen on the class parameters.
1268         */
1269        tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1270        tcm->tcm_handle = cl->common.classid;
1271        if (!cl->level && cl->leaf.q)
1272                tcm->tcm_info = cl->leaf.q->handle;
1273
1274        nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1275        if (nest == NULL)
1276                goto nla_put_failure;
1277
1278        memset(&opt, 0, sizeof(opt));
1279
1280        psched_ratecfg_getrate(&opt.rate, &cl->rate);
1281        opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1282        psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1283        opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1284        opt.quantum = cl->quantum;
1285        opt.prio = cl->prio;
1286        opt.level = cl->level;
1287        if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1288                goto nla_put_failure;
1289        if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1290                goto nla_put_failure;
1291        if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1292            nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1293                              TCA_HTB_PAD))
1294                goto nla_put_failure;
1295        if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1296            nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1297                              TCA_HTB_PAD))
1298                goto nla_put_failure;
1299
1300        return nla_nest_end(skb, nest);
1301
1302nla_put_failure:
1303        nla_nest_cancel(skb, nest);
1304        return -1;
1305}
1306
1307static void htb_offload_aggregate_stats(struct htb_sched *q,
1308                                        struct htb_class *cl)
1309{
1310        struct htb_class *c;
1311        unsigned int i;
1312
1313        memset(&cl->bstats, 0, sizeof(cl->bstats));
1314
1315        for (i = 0; i < q->clhash.hashsize; i++) {
1316                hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1317                        struct htb_class *p = c;
1318
1319                        while (p && p->level < cl->level)
1320                                p = p->parent;
1321
1322                        if (p != cl)
1323                                continue;
1324
1325                        cl->bstats.bytes += c->bstats_bias.bytes;
1326                        cl->bstats.packets += c->bstats_bias.packets;
1327                        if (c->level == 0) {
1328                                cl->bstats.bytes += c->leaf.q->bstats.bytes;
1329                                cl->bstats.packets += c->leaf.q->bstats.packets;
1330                        }
1331                }
1332        }
1333}
1334
1335static int
1336htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1337{
1338        struct htb_class *cl = (struct htb_class *)arg;
1339        struct htb_sched *q = qdisc_priv(sch);
1340        struct gnet_stats_queue qs = {
1341                .drops = cl->drops,
1342                .overlimits = cl->overlimits,
1343        };
1344        __u32 qlen = 0;
1345
1346        if (!cl->level && cl->leaf.q)
1347                qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1348
1349        cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1350                                    INT_MIN, INT_MAX);
1351        cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1352                                     INT_MIN, INT_MAX);
1353
1354        if (q->offload) {
1355                if (!cl->level) {
1356                        if (cl->leaf.q)
1357                                cl->bstats = cl->leaf.q->bstats;
1358                        else
1359                                memset(&cl->bstats, 0, sizeof(cl->bstats));
1360                        cl->bstats.bytes += cl->bstats_bias.bytes;
1361                        cl->bstats.packets += cl->bstats_bias.packets;
1362                } else {
1363                        htb_offload_aggregate_stats(q, cl);
1364                }
1365        }
1366
1367        if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1368                                  d, NULL, &cl->bstats) < 0 ||
1369            gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1370            gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1371                return -1;
1372
1373        return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1374}
1375
1376static struct netdev_queue *
1377htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1378{
1379        struct net_device *dev = qdisc_dev(sch);
1380        struct tc_htb_qopt_offload offload_opt;
1381        struct htb_sched *q = qdisc_priv(sch);
1382        int err;
1383
1384        if (!q->offload)
1385                return sch->dev_queue;
1386
1387        offload_opt = (struct tc_htb_qopt_offload) {
1388                .command = TC_HTB_LEAF_QUERY_QUEUE,
1389                .classid = TC_H_MIN(tcm->tcm_parent),
1390        };
1391        err = htb_offload(dev, &offload_opt);
1392        if (err || offload_opt.qid >= dev->num_tx_queues)
1393                return NULL;
1394        return netdev_get_tx_queue(dev, offload_opt.qid);
1395}
1396
1397static struct Qdisc *
1398htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1399{
1400        struct net_device *dev = dev_queue->dev;
1401        struct Qdisc *old_q;
1402
1403        if (dev->flags & IFF_UP)
1404                dev_deactivate(dev);
1405        old_q = dev_graft_qdisc(dev_queue, new_q);
1406        if (new_q)
1407                new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1408        if (dev->flags & IFF_UP)
1409                dev_activate(dev);
1410
1411        return old_q;
1412}
1413
1414static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1415{
1416        struct netdev_queue *queue_old, *queue_new;
1417        struct net_device *dev = qdisc_dev(sch);
1418        struct Qdisc *qdisc;
1419
1420        queue_old = netdev_get_tx_queue(dev, qid_old);
1421        queue_new = netdev_get_tx_queue(dev, qid_new);
1422
1423        if (dev->flags & IFF_UP)
1424                dev_deactivate(dev);
1425        qdisc = dev_graft_qdisc(queue_old, NULL);
1426        qdisc->dev_queue = queue_new;
1427        qdisc = dev_graft_qdisc(queue_new, qdisc);
1428        if (dev->flags & IFF_UP)
1429                dev_activate(dev);
1430
1431        WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1432}
1433
1434static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1435                     struct Qdisc **old, struct netlink_ext_ack *extack)
1436{
1437        struct netdev_queue *dev_queue = sch->dev_queue;
1438        struct htb_class *cl = (struct htb_class *)arg;
1439        struct htb_sched *q = qdisc_priv(sch);
1440        struct Qdisc *old_q;
1441
1442        if (cl->level)
1443                return -EINVAL;
1444
1445        if (q->offload) {
1446                dev_queue = new->dev_queue;
1447                WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1448        }
1449
1450        if (!new) {
1451                new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1452                                        cl->common.classid, extack);
1453                if (!new)
1454                        return -ENOBUFS;
1455        }
1456
1457        if (q->offload) {
1458                htb_set_lockdep_class_child(new);
1459                /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1460                qdisc_refcount_inc(new);
1461                old_q = htb_graft_helper(dev_queue, new);
1462        }
1463
1464        *old = qdisc_replace(sch, new, &cl->leaf.q);
1465
1466        if (q->offload) {
1467                WARN_ON(old_q != *old);
1468                qdisc_put(old_q);
1469        }
1470
1471        return 0;
1472}
1473
1474static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1475{
1476        struct htb_class *cl = (struct htb_class *)arg;
1477        return !cl->level ? cl->leaf.q : NULL;
1478}
1479
1480static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1481{
1482        struct htb_class *cl = (struct htb_class *)arg;
1483
1484        htb_deactivate(qdisc_priv(sch), cl);
1485}
1486
1487static inline int htb_parent_last_child(struct htb_class *cl)
1488{
1489        if (!cl->parent)
1490                /* the root class */
1491                return 0;
1492        if (cl->parent->children > 1)
1493                /* not the last child */
1494                return 0;
1495        return 1;
1496}
1497
1498static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1499                               struct Qdisc *new_q)
1500{
1501        struct htb_sched *q = qdisc_priv(sch);
1502        struct htb_class *parent = cl->parent;
1503
1504        WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1505
1506        if (parent->cmode != HTB_CAN_SEND)
1507                htb_safe_rb_erase(&parent->pq_node,
1508                                  &q->hlevel[parent->level].wait_pq);
1509
1510        parent->level = 0;
1511        memset(&parent->inner, 0, sizeof(parent->inner));
1512        parent->leaf.q = new_q ? new_q : &noop_qdisc;
1513        parent->tokens = parent->buffer;
1514        parent->ctokens = parent->cbuffer;
1515        parent->t_c = ktime_get_ns();
1516        parent->cmode = HTB_CAN_SEND;
1517}
1518
1519static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1520                                       struct netdev_queue *dev_queue,
1521                                       struct Qdisc *new_q)
1522{
1523        struct Qdisc *old_q;
1524
1525        /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1526        if (new_q)
1527                qdisc_refcount_inc(new_q);
1528        old_q = htb_graft_helper(dev_queue, new_q);
1529        WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1530}
1531
1532static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1533                                     bool last_child, bool destroying,
1534                                     struct netlink_ext_ack *extack)
1535{
1536        struct tc_htb_qopt_offload offload_opt;
1537        struct Qdisc *q = cl->leaf.q;
1538        struct Qdisc *old = NULL;
1539        int err;
1540
1541        if (cl->level)
1542                return -EINVAL;
1543
1544        WARN_ON(!q);
1545        if (!destroying) {
1546                /* On destroy of HTB, two cases are possible:
1547                 * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
1548                 * 2. q is a noop qdisc (for nodes that were inner),
1549                 *    q->dev_queue is noop_netdev_queue.
1550                 */
1551                old = htb_graft_helper(q->dev_queue, NULL);
1552                WARN_ON(!old);
1553                WARN_ON(old != q);
1554        }
1555
1556        if (cl->parent) {
1557                cl->parent->bstats_bias.bytes += q->bstats.bytes;
1558                cl->parent->bstats_bias.packets += q->bstats.packets;
1559        }
1560
1561        offload_opt = (struct tc_htb_qopt_offload) {
1562                .command = !last_child ? TC_HTB_LEAF_DEL :
1563                           destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1564                           TC_HTB_LEAF_DEL_LAST,
1565                .classid = cl->common.classid,
1566                .extack = extack,
1567        };
1568        err = htb_offload(qdisc_dev(sch), &offload_opt);
1569
1570        if (!err || destroying)
1571                qdisc_put(old);
1572        else
1573                htb_graft_helper(q->dev_queue, old);
1574
1575        if (last_child)
1576                return err;
1577
1578        if (!err && offload_opt.moved_qid != 0) {
1579                if (destroying)
1580                        q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1581                                                           offload_opt.qid);
1582                else
1583                        htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1584                                               offload_opt.qid);
1585        }
1586
1587        return err;
1588}
1589
1590static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1591{
1592        if (!cl->level) {
1593                WARN_ON(!cl->leaf.q);
1594                qdisc_put(cl->leaf.q);
1595        }
1596        gen_kill_estimator(&cl->rate_est);
1597        tcf_block_put(cl->block);
1598        kfree(cl);
1599}
1600
1601static void htb_destroy(struct Qdisc *sch)
1602{
1603        struct net_device *dev = qdisc_dev(sch);
1604        struct tc_htb_qopt_offload offload_opt;
1605        struct htb_sched *q = qdisc_priv(sch);
1606        struct hlist_node *next;
1607        bool nonempty, changed;
1608        struct htb_class *cl;
1609        unsigned int i;
1610
1611        cancel_work_sync(&q->work);
1612        qdisc_watchdog_cancel(&q->watchdog);
1613        /* This line used to be after htb_destroy_class call below
1614         * and surprisingly it worked in 2.4. But it must precede it
1615         * because filter need its target class alive to be able to call
1616         * unbind_filter on it (without Oops).
1617         */
1618        tcf_block_put(q->block);
1619
1620        for (i = 0; i < q->clhash.hashsize; i++) {
1621                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1622                        tcf_block_put(cl->block);
1623                        cl->block = NULL;
1624                }
1625        }
1626
1627        do {
1628                nonempty = false;
1629                changed = false;
1630                for (i = 0; i < q->clhash.hashsize; i++) {
1631                        hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1632                                                  common.hnode) {
1633                                bool last_child;
1634
1635                                if (!q->offload) {
1636                                        htb_destroy_class(sch, cl);
1637                                        continue;
1638                                }
1639
1640                                nonempty = true;
1641
1642                                if (cl->level)
1643                                        continue;
1644
1645                                changed = true;
1646
1647                                last_child = htb_parent_last_child(cl);
1648                                htb_destroy_class_offload(sch, cl, last_child,
1649                                                          true, NULL);
1650                                qdisc_class_hash_remove(&q->clhash,
1651                                                        &cl->common);
1652                                if (cl->parent)
1653                                        cl->parent->children--;
1654                                if (last_child)
1655                                        htb_parent_to_leaf(sch, cl, NULL);
1656                                htb_destroy_class(sch, cl);
1657                        }
1658                }
1659        } while (changed);
1660        WARN_ON(nonempty);
1661
1662        qdisc_class_hash_destroy(&q->clhash);
1663        __qdisc_reset_queue(&q->direct_queue);
1664
1665        if (!q->offload)
1666                return;
1667
1668        offload_opt = (struct tc_htb_qopt_offload) {
1669                .command = TC_HTB_DESTROY,
1670        };
1671        htb_offload(dev, &offload_opt);
1672
1673        if (!q->direct_qdiscs)
1674                return;
1675        for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1676                qdisc_put(q->direct_qdiscs[i]);
1677        kfree(q->direct_qdiscs);
1678}
1679
1680static int htb_delete(struct Qdisc *sch, unsigned long arg,
1681                      struct netlink_ext_ack *extack)
1682{
1683        struct htb_sched *q = qdisc_priv(sch);
1684        struct htb_class *cl = (struct htb_class *)arg;
1685        struct Qdisc *new_q = NULL;
1686        int last_child = 0;
1687        int err;
1688
1689        /* TODO: why don't allow to delete subtree ? references ? does
1690         * tc subsys guarantee us that in htb_destroy it holds no class
1691         * refs so that we can remove children safely there ?
1692         */
1693        if (cl->children || cl->filter_cnt)
1694                return -EBUSY;
1695
1696        if (!cl->level && htb_parent_last_child(cl))
1697                last_child = 1;
1698
1699        if (q->offload) {
1700                err = htb_destroy_class_offload(sch, cl, last_child, false,
1701                                                extack);
1702                if (err)
1703                        return err;
1704        }
1705
1706        if (last_child) {
1707                struct netdev_queue *dev_queue;
1708
1709                dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1710                new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1711                                          cl->parent->common.classid,
1712                                          NULL);
1713                if (q->offload) {
1714                        if (new_q)
1715                                htb_set_lockdep_class_child(new_q);
1716                        htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1717                }
1718        }
1719
1720        sch_tree_lock(sch);
1721
1722        if (!cl->level)
1723                qdisc_purge_queue(cl->leaf.q);
1724
1725        /* delete from hash and active; remainder in destroy_class */
1726        qdisc_class_hash_remove(&q->clhash, &cl->common);
1727        if (cl->parent)
1728                cl->parent->children--;
1729
1730        if (cl->prio_activity)
1731                htb_deactivate(q, cl);
1732
1733        if (cl->cmode != HTB_CAN_SEND)
1734                htb_safe_rb_erase(&cl->pq_node,
1735                                  &q->hlevel[cl->level].wait_pq);
1736
1737        if (last_child)
1738                htb_parent_to_leaf(sch, cl, new_q);
1739
1740        sch_tree_unlock(sch);
1741
1742        htb_destroy_class(sch, cl);
1743        return 0;
1744}
1745
1746static int htb_change_class(struct Qdisc *sch, u32 classid,
1747                            u32 parentid, struct nlattr **tca,
1748                            unsigned long *arg, struct netlink_ext_ack *extack)
1749{
1750        int err = -EINVAL;
1751        struct htb_sched *q = qdisc_priv(sch);
1752        struct htb_class *cl = (struct htb_class *)*arg, *parent;
1753        struct tc_htb_qopt_offload offload_opt;
1754        struct nlattr *opt = tca[TCA_OPTIONS];
1755        struct nlattr *tb[TCA_HTB_MAX + 1];
1756        struct Qdisc *parent_qdisc = NULL;
1757        struct netdev_queue *dev_queue;
1758        struct tc_htb_opt *hopt;
1759        u64 rate64, ceil64;
1760        int warn = 0;
1761
1762        /* extract all subattrs from opt attr */
1763        if (!opt)
1764                goto failure;
1765
1766        err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1767                                          NULL);
1768        if (err < 0)
1769                goto failure;
1770
1771        err = -EINVAL;
1772        if (tb[TCA_HTB_PARMS] == NULL)
1773                goto failure;
1774
1775        parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1776
1777        hopt = nla_data(tb[TCA_HTB_PARMS]);
1778        if (!hopt->rate.rate || !hopt->ceil.rate)
1779                goto failure;
1780
1781        /* Keeping backward compatible with rate_table based iproute2 tc */
1782        if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1783                qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1784                                              NULL));
1785
1786        if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1787                qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1788                                              NULL));
1789
1790        rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1791        ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1792
1793        if (!cl) {              /* new class */
1794                struct net_device *dev = qdisc_dev(sch);
1795                struct Qdisc *new_q, *old_q;
1796                int prio;
1797                struct {
1798                        struct nlattr           nla;
1799                        struct gnet_estimator   opt;
1800                } est = {
1801                        .nla = {
1802                                .nla_len        = nla_attr_size(sizeof(est.opt)),
1803                                .nla_type       = TCA_RATE,
1804                        },
1805                        .opt = {
1806                                /* 4s interval, 16s averaging constant */
1807                                .interval       = 2,
1808                                .ewma_log       = 2,
1809                        },
1810                };
1811
1812                /* check for valid classid */
1813                if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1814                    htb_find(classid, sch))
1815                        goto failure;
1816
1817                /* check maximal depth */
1818                if (parent && parent->parent && parent->parent->level < 2) {
1819                        pr_err("htb: tree is too deep\n");
1820                        goto failure;
1821                }
1822                err = -ENOBUFS;
1823                cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1824                if (!cl)
1825                        goto failure;
1826
1827                err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1828                if (err) {
1829                        kfree(cl);
1830                        goto failure;
1831                }
1832                if (htb_rate_est || tca[TCA_RATE]) {
1833                        err = gen_new_estimator(&cl->bstats, NULL,
1834                                                &cl->rate_est,
1835                                                NULL,
1836                                                qdisc_root_sleeping_running(sch),
1837                                                tca[TCA_RATE] ? : &est.nla);
1838                        if (err)
1839                                goto err_block_put;
1840                }
1841
1842                cl->children = 0;
1843                RB_CLEAR_NODE(&cl->pq_node);
1844
1845                for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1846                        RB_CLEAR_NODE(&cl->node[prio]);
1847
1848                cl->common.classid = classid;
1849
1850                /* Make sure nothing interrupts us in between of two
1851                 * ndo_setup_tc calls.
1852                 */
1853                ASSERT_RTNL();
1854
1855                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1856                 * so that can't be used inside of sch_tree_lock
1857                 * -- thanks to Karlis Peisenieks
1858                 */
1859                if (!q->offload) {
1860                        dev_queue = sch->dev_queue;
1861                } else if (!(parent && !parent->level)) {
1862                        /* Assign a dev_queue to this classid. */
1863                        offload_opt = (struct tc_htb_qopt_offload) {
1864                                .command = TC_HTB_LEAF_ALLOC_QUEUE,
1865                                .classid = cl->common.classid,
1866                                .parent_classid = parent ?
1867                                        TC_H_MIN(parent->common.classid) :
1868                                        TC_HTB_CLASSID_ROOT,
1869                                .rate = max_t(u64, hopt->rate.rate, rate64),
1870                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1871                                .extack = extack,
1872                        };
1873                        err = htb_offload(dev, &offload_opt);
1874                        if (err) {
1875                                pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1876                                       err);
1877                                goto err_kill_estimator;
1878                        }
1879                        dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1880                } else { /* First child. */
1881                        dev_queue = parent->leaf.q->dev_queue;
1882                        old_q = htb_graft_helper(dev_queue, NULL);
1883                        WARN_ON(old_q != parent->leaf.q);
1884                        offload_opt = (struct tc_htb_qopt_offload) {
1885                                .command = TC_HTB_LEAF_TO_INNER,
1886                                .classid = cl->common.classid,
1887                                .parent_classid =
1888                                        TC_H_MIN(parent->common.classid),
1889                                .rate = max_t(u64, hopt->rate.rate, rate64),
1890                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1891                                .extack = extack,
1892                        };
1893                        err = htb_offload(dev, &offload_opt);
1894                        if (err) {
1895                                pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1896                                       err);
1897                                htb_graft_helper(dev_queue, old_q);
1898                                goto err_kill_estimator;
1899                        }
1900                        parent->bstats_bias.bytes += old_q->bstats.bytes;
1901                        parent->bstats_bias.packets += old_q->bstats.packets;
1902                        qdisc_put(old_q);
1903                }
1904                new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1905                                          classid, NULL);
1906                if (q->offload) {
1907                        if (new_q) {
1908                                htb_set_lockdep_class_child(new_q);
1909                                /* One ref for cl->leaf.q, the other for
1910                                 * dev_queue->qdisc.
1911                                 */
1912                                qdisc_refcount_inc(new_q);
1913                        }
1914                        old_q = htb_graft_helper(dev_queue, new_q);
1915                        /* No qdisc_put needed. */
1916                        WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1917                }
1918                sch_tree_lock(sch);
1919                if (parent && !parent->level) {
1920                        /* turn parent into inner node */
1921                        qdisc_purge_queue(parent->leaf.q);
1922                        parent_qdisc = parent->leaf.q;
1923                        if (parent->prio_activity)
1924                                htb_deactivate(q, parent);
1925
1926                        /* remove from evt list because of level change */
1927                        if (parent->cmode != HTB_CAN_SEND) {
1928                                htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1929                                parent->cmode = HTB_CAN_SEND;
1930                        }
1931                        parent->level = (parent->parent ? parent->parent->level
1932                                         : TC_HTB_MAXDEPTH) - 1;
1933                        memset(&parent->inner, 0, sizeof(parent->inner));
1934                }
1935
1936                /* leaf (we) needs elementary qdisc */
1937                cl->leaf.q = new_q ? new_q : &noop_qdisc;
1938
1939                cl->parent = parent;
1940
1941                /* set class to be in HTB_CAN_SEND state */
1942                cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1943                cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1944                cl->mbuffer = 60ULL * NSEC_PER_SEC;     /* 1min */
1945                cl->t_c = ktime_get_ns();
1946                cl->cmode = HTB_CAN_SEND;
1947
1948                /* attach to the hash list and parent's family */
1949                qdisc_class_hash_insert(&q->clhash, &cl->common);
1950                if (parent)
1951                        parent->children++;
1952                if (cl->leaf.q != &noop_qdisc)
1953                        qdisc_hash_add(cl->leaf.q, true);
1954        } else {
1955                if (tca[TCA_RATE]) {
1956                        err = gen_replace_estimator(&cl->bstats, NULL,
1957                                                    &cl->rate_est,
1958                                                    NULL,
1959                                                    qdisc_root_sleeping_running(sch),
1960                                                    tca[TCA_RATE]);
1961                        if (err)
1962                                return err;
1963                }
1964
1965                if (q->offload) {
1966                        struct net_device *dev = qdisc_dev(sch);
1967
1968                        offload_opt = (struct tc_htb_qopt_offload) {
1969                                .command = TC_HTB_NODE_MODIFY,
1970                                .classid = cl->common.classid,
1971                                .rate = max_t(u64, hopt->rate.rate, rate64),
1972                                .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1973                                .extack = extack,
1974                        };
1975                        err = htb_offload(dev, &offload_opt);
1976                        if (err)
1977                                /* Estimator was replaced, and rollback may fail
1978                                 * as well, so we don't try to recover it, and
1979                                 * the estimator won't work property with the
1980                                 * offload anyway, because bstats are updated
1981                                 * only when the stats are queried.
1982                                 */
1983                                return err;
1984                }
1985
1986                sch_tree_lock(sch);
1987        }
1988
1989        psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1990        psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1991
1992        /* it used to be a nasty bug here, we have to check that node
1993         * is really leaf before changing cl->leaf !
1994         */
1995        if (!cl->level) {
1996                u64 quantum = cl->rate.rate_bytes_ps;
1997
1998                do_div(quantum, q->rate2quantum);
1999                cl->quantum = min_t(u64, quantum, INT_MAX);
2000
2001                if (!hopt->quantum && cl->quantum < 1000) {
2002                        warn = -1;
2003                        cl->quantum = 1000;
2004                }
2005                if (!hopt->quantum && cl->quantum > 200000) {
2006                        warn = 1;
2007                        cl->quantum = 200000;
2008                }
2009                if (hopt->quantum)
2010                        cl->quantum = hopt->quantum;
2011                if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2012                        cl->prio = TC_HTB_NUMPRIO - 1;
2013        }
2014
2015        cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2016        cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2017
2018        sch_tree_unlock(sch);
2019        qdisc_put(parent_qdisc);
2020
2021        if (warn)
2022                pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2023                            cl->common.classid, (warn == -1 ? "small" : "big"));
2024
2025        qdisc_class_hash_grow(sch, &q->clhash);
2026
2027        *arg = (unsigned long)cl;
2028        return 0;
2029
2030err_kill_estimator:
2031        gen_kill_estimator(&cl->rate_est);
2032err_block_put:
2033        tcf_block_put(cl->block);
2034        kfree(cl);
2035failure:
2036        return err;
2037}
2038
2039static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2040                                       struct netlink_ext_ack *extack)
2041{
2042        struct htb_sched *q = qdisc_priv(sch);
2043        struct htb_class *cl = (struct htb_class *)arg;
2044
2045        return cl ? cl->block : q->block;
2046}
2047
2048static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2049                                     u32 classid)
2050{
2051        struct htb_class *cl = htb_find(classid, sch);
2052
2053        /*if (cl && !cl->level) return 0;
2054         * The line above used to be there to prevent attaching filters to
2055         * leaves. But at least tc_index filter uses this just to get class
2056         * for other reasons so that we have to allow for it.
2057         * ----
2058         * 19.6.2002 As Werner explained it is ok - bind filter is just
2059         * another way to "lock" the class - unlike "get" this lock can
2060         * be broken by class during destroy IIUC.
2061         */
2062        if (cl)
2063                cl->filter_cnt++;
2064        return (unsigned long)cl;
2065}
2066
2067static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2068{
2069        struct htb_class *cl = (struct htb_class *)arg;
2070
2071        if (cl)
2072                cl->filter_cnt--;
2073}
2074
2075static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2076{
2077        struct htb_sched *q = qdisc_priv(sch);
2078        struct htb_class *cl;
2079        unsigned int i;
2080
2081        if (arg->stop)
2082                return;
2083
2084        for (i = 0; i < q->clhash.hashsize; i++) {
2085                hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2086                        if (arg->count < arg->skip) {
2087                                arg->count++;
2088                                continue;
2089                        }
2090                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2091                                arg->stop = 1;
2092                                return;
2093                        }
2094                        arg->count++;
2095                }
2096        }
2097}
2098
2099static const struct Qdisc_class_ops htb_class_ops = {
2100        .select_queue   =       htb_select_queue,
2101        .graft          =       htb_graft,
2102        .leaf           =       htb_leaf,
2103        .qlen_notify    =       htb_qlen_notify,
2104        .find           =       htb_search,
2105        .change         =       htb_change_class,
2106        .delete         =       htb_delete,
2107        .walk           =       htb_walk,
2108        .tcf_block      =       htb_tcf_block,
2109        .bind_tcf       =       htb_bind_filter,
2110        .unbind_tcf     =       htb_unbind_filter,
2111        .dump           =       htb_dump_class,
2112        .dump_stats     =       htb_dump_class_stats,
2113};
2114
2115static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2116        .cl_ops         =       &htb_class_ops,
2117        .id             =       "htb",
2118        .priv_size      =       sizeof(struct htb_sched),
2119        .enqueue        =       htb_enqueue,
2120        .dequeue        =       htb_dequeue,
2121        .peek           =       qdisc_peek_dequeued,
2122        .init           =       htb_init,
2123        .attach         =       htb_attach,
2124        .reset          =       htb_reset,
2125        .destroy        =       htb_destroy,
2126        .dump           =       htb_dump,
2127        .owner          =       THIS_MODULE,
2128};
2129
2130static int __init htb_module_init(void)
2131{
2132        return register_qdisc(&htb_qdisc_ops);
2133}
2134static void __exit htb_module_exit(void)
2135{
2136        unregister_qdisc(&htb_qdisc_ops);
2137}
2138
2139module_init(htb_module_init)
2140module_exit(htb_module_exit)
2141MODULE_LICENSE("GPL");
2142