1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <linux/types.h>
27#include <linux/kernel.h>
28#include <linux/string.h>
29#include <linux/errno.h>
30#include <linux/skbuff.h>
31#include <linux/list.h>
32#include <linux/compiler.h>
33#include <linux/rbtree.h>
34#include <linux/workqueue.h>
35#include <linux/slab.h>
36#include <net/netlink.h>
37#include <net/sch_generic.h>
38#include <net/pkt_sched.h>
39#include <net/pkt_cls.h>
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54static int htb_hysteresis __read_mostly = 0;
55#define HTB_VER 0x30011
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
61
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
65static int htb_rate_est = 0;
66module_param(htb_rate_est, int, 0640);
67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
69
70enum htb_cmode {
71 HTB_CANT_SEND,
72 HTB_MAY_BORROW,
73 HTB_CAN_SEND
74};
75
76struct htb_prio {
77 union {
78 struct rb_root row;
79 struct rb_root feed;
80 };
81 struct rb_node *ptr;
82
83
84
85
86
87 u32 last_ptr_id;
88};
89
90
91
92
93
94struct htb_class {
95 struct Qdisc_class_common common;
96 struct psched_ratecfg rate;
97 struct psched_ratecfg ceil;
98 s64 buffer, cbuffer;
99 s64 mbuffer;
100 u32 prio;
101 int quantum;
102
103 struct tcf_proto __rcu *filter_list;
104 struct tcf_block *block;
105 int filter_cnt;
106
107 int level;
108 unsigned int children;
109 struct htb_class *parent;
110
111 struct net_rate_estimator __rcu *rate_est;
112
113
114
115
116 struct gnet_stats_basic_packed bstats;
117 struct gnet_stats_basic_packed bstats_bias;
118 struct tc_htb_xstats xstats;
119
120
121 s64 tokens, ctokens;
122 s64 t_c;
123
124 union {
125 struct htb_class_leaf {
126 int deficit[TC_HTB_MAXDEPTH];
127 struct Qdisc *q;
128 } leaf;
129 struct htb_class_inner {
130 struct htb_prio clprio[TC_HTB_NUMPRIO];
131 } inner;
132 };
133 s64 pq_key;
134
135 int prio_activity;
136 enum htb_cmode cmode;
137 struct rb_node pq_node;
138 struct rb_node node[TC_HTB_NUMPRIO];
139
140 unsigned int drops ____cacheline_aligned_in_smp;
141 unsigned int overlimits;
142};
143
144struct htb_level {
145 struct rb_root wait_pq;
146 struct htb_prio hprio[TC_HTB_NUMPRIO];
147};
148
149struct htb_sched {
150 struct Qdisc_class_hash clhash;
151 int defcls;
152 int rate2quantum;
153
154
155 struct tcf_proto __rcu *filter_list;
156 struct tcf_block *block;
157
158#define HTB_WARN_TOOMANYEVENTS 0x1
159 unsigned int warned;
160 int direct_qlen;
161 struct work_struct work;
162
163
164 struct qdisc_skb_head direct_queue;
165 u32 direct_pkts;
166 u32 overlimits;
167
168 struct qdisc_watchdog watchdog;
169
170 s64 now;
171
172
173 s64 near_ev_cache[TC_HTB_MAXDEPTH];
174
175 int row_mask[TC_HTB_MAXDEPTH];
176
177 struct htb_level hlevel[TC_HTB_MAXDEPTH];
178
179 struct Qdisc **direct_qdiscs;
180 unsigned int num_direct_qdiscs;
181
182 bool offload;
183};
184
185
186static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187{
188 struct htb_sched *q = qdisc_priv(sch);
189 struct Qdisc_class_common *clc;
190
191 clc = qdisc_class_find(&q->clhash, handle);
192 if (clc == NULL)
193 return NULL;
194 return container_of(clc, struct htb_class, common);
195}
196
197static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198{
199 return (unsigned long)htb_find(handle, sch);
200}
201
202
203
204
205
206
207
208
209
210
211
212
213#define HTB_DIRECT ((struct htb_class *)-1L)
214
215static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 int *qerr)
217{
218 struct htb_sched *q = qdisc_priv(sch);
219 struct htb_class *cl;
220 struct tcf_result res;
221 struct tcf_proto *tcf;
222 int result;
223
224
225
226
227
228 if (skb->priority == sch->handle)
229 return HTB_DIRECT;
230 cl = htb_find(skb->priority, sch);
231 if (cl) {
232 if (cl->level == 0)
233 return cl;
234
235 tcf = rcu_dereference_bh(cl->filter_list);
236 } else {
237 tcf = rcu_dereference_bh(q->filter_list);
238 }
239
240 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
241 while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
242#ifdef CONFIG_NET_CLS_ACT
243 switch (result) {
244 case TC_ACT_QUEUED:
245 case TC_ACT_STOLEN:
246 case TC_ACT_TRAP:
247 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
248 fallthrough;
249 case TC_ACT_SHOT:
250 return NULL;
251 }
252#endif
253 cl = (void *)res.class;
254 if (!cl) {
255 if (res.classid == sch->handle)
256 return HTB_DIRECT;
257 cl = htb_find(res.classid, sch);
258 if (!cl)
259 break;
260 }
261 if (!cl->level)
262 return cl;
263
264
265 tcf = rcu_dereference_bh(cl->filter_list);
266 }
267
268 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
269 if (!cl || cl->level)
270 return HTB_DIRECT;
271 return cl;
272}
273
274
275
276
277
278
279
280
281
282
283static void htb_add_to_id_tree(struct rb_root *root,
284 struct htb_class *cl, int prio)
285{
286 struct rb_node **p = &root->rb_node, *parent = NULL;
287
288 while (*p) {
289 struct htb_class *c;
290 parent = *p;
291 c = rb_entry(parent, struct htb_class, node[prio]);
292
293 if (cl->common.classid > c->common.classid)
294 p = &parent->rb_right;
295 else
296 p = &parent->rb_left;
297 }
298 rb_link_node(&cl->node[prio], parent, p);
299 rb_insert_color(&cl->node[prio], root);
300}
301
302
303
304
305
306
307
308
309
310
311
312static void htb_add_to_wait_tree(struct htb_sched *q,
313 struct htb_class *cl, s64 delay)
314{
315 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
316
317 cl->pq_key = q->now + delay;
318 if (cl->pq_key == q->now)
319 cl->pq_key++;
320
321
322 if (q->near_ev_cache[cl->level] > cl->pq_key)
323 q->near_ev_cache[cl->level] = cl->pq_key;
324
325 while (*p) {
326 struct htb_class *c;
327 parent = *p;
328 c = rb_entry(parent, struct htb_class, pq_node);
329 if (cl->pq_key >= c->pq_key)
330 p = &parent->rb_right;
331 else
332 p = &parent->rb_left;
333 }
334 rb_link_node(&cl->pq_node, parent, p);
335 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
336}
337
338
339
340
341
342
343
344
345static inline void htb_next_rb_node(struct rb_node **n)
346{
347 *n = rb_next(*n);
348}
349
350
351
352
353
354
355
356
357
358
359static inline void htb_add_class_to_row(struct htb_sched *q,
360 struct htb_class *cl, int mask)
361{
362 q->row_mask[cl->level] |= mask;
363 while (mask) {
364 int prio = ffz(~mask);
365 mask &= ~(1 << prio);
366 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
367 }
368}
369
370
371static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
372{
373 if (RB_EMPTY_NODE(rb)) {
374 WARN_ON(1);
375 } else {
376 rb_erase(rb, root);
377 RB_CLEAR_NODE(rb);
378 }
379}
380
381
382
383
384
385
386
387
388
389
390
391static inline void htb_remove_class_from_row(struct htb_sched *q,
392 struct htb_class *cl, int mask)
393{
394 int m = 0;
395 struct htb_level *hlevel = &q->hlevel[cl->level];
396
397 while (mask) {
398 int prio = ffz(~mask);
399 struct htb_prio *hprio = &hlevel->hprio[prio];
400
401 mask &= ~(1 << prio);
402 if (hprio->ptr == cl->node + prio)
403 htb_next_rb_node(&hprio->ptr);
404
405 htb_safe_rb_erase(cl->node + prio, &hprio->row);
406 if (!hprio->row.rb_node)
407 m |= 1 << prio;
408 }
409 q->row_mask[cl->level] &= ~m;
410}
411
412
413
414
415
416
417
418
419
420
421static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
422{
423 struct htb_class *p = cl->parent;
424 long m, mask = cl->prio_activity;
425
426 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
427 m = mask;
428 while (m) {
429 int prio = ffz(~m);
430 m &= ~(1 << prio);
431
432 if (p->inner.clprio[prio].feed.rb_node)
433
434
435
436 mask &= ~(1 << prio);
437
438 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
439 }
440 p->prio_activity |= mask;
441 cl = p;
442 p = cl->parent;
443
444 }
445 if (cl->cmode == HTB_CAN_SEND && mask)
446 htb_add_class_to_row(q, cl, mask);
447}
448
449
450
451
452
453
454
455
456
457
458static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
459{
460 struct htb_class *p = cl->parent;
461 long m, mask = cl->prio_activity;
462
463 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
464 m = mask;
465 mask = 0;
466 while (m) {
467 int prio = ffz(~m);
468 m &= ~(1 << prio);
469
470 if (p->inner.clprio[prio].ptr == cl->node + prio) {
471
472
473
474
475 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
476 p->inner.clprio[prio].ptr = NULL;
477 }
478
479 htb_safe_rb_erase(cl->node + prio,
480 &p->inner.clprio[prio].feed);
481
482 if (!p->inner.clprio[prio].feed.rb_node)
483 mask |= 1 << prio;
484 }
485
486 p->prio_activity &= ~mask;
487 cl = p;
488 p = cl->parent;
489
490 }
491 if (cl->cmode == HTB_CAN_SEND && mask)
492 htb_remove_class_from_row(q, cl, mask);
493}
494
495static inline s64 htb_lowater(const struct htb_class *cl)
496{
497 if (htb_hysteresis)
498 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
499 else
500 return 0;
501}
502static inline s64 htb_hiwater(const struct htb_class *cl)
503{
504 if (htb_hysteresis)
505 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
506 else
507 return 0;
508}
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524static inline enum htb_cmode
525htb_class_mode(struct htb_class *cl, s64 *diff)
526{
527 s64 toks;
528
529 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
530 *diff = -toks;
531 return HTB_CANT_SEND;
532 }
533
534 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
535 return HTB_CAN_SEND;
536
537 *diff = -toks;
538 return HTB_MAY_BORROW;
539}
540
541
542
543
544
545
546
547
548
549
550
551
552
553static void
554htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
555{
556 enum htb_cmode new_mode = htb_class_mode(cl, diff);
557
558 if (new_mode == cl->cmode)
559 return;
560
561 if (new_mode == HTB_CANT_SEND) {
562 cl->overlimits++;
563 q->overlimits++;
564 }
565
566 if (cl->prio_activity) {
567 if (cl->cmode != HTB_CANT_SEND)
568 htb_deactivate_prios(q, cl);
569 cl->cmode = new_mode;
570 if (new_mode != HTB_CANT_SEND)
571 htb_activate_prios(q, cl);
572 } else
573 cl->cmode = new_mode;
574}
575
576
577
578
579
580
581
582
583
584
585static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
586{
587 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
588
589 if (!cl->prio_activity) {
590 cl->prio_activity = 1 << cl->prio;
591 htb_activate_prios(q, cl);
592 }
593}
594
595
596
597
598
599
600
601
602
603static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
604{
605 WARN_ON(!cl->prio_activity);
606
607 htb_deactivate_prios(q, cl);
608 cl->prio_activity = 0;
609}
610
611static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
612 struct sk_buff **to_free)
613{
614 int ret;
615 unsigned int len = qdisc_pkt_len(skb);
616 struct htb_sched *q = qdisc_priv(sch);
617 struct htb_class *cl = htb_classify(skb, sch, &ret);
618
619 if (cl == HTB_DIRECT) {
620
621 if (q->direct_queue.qlen < q->direct_qlen) {
622 __qdisc_enqueue_tail(skb, &q->direct_queue);
623 q->direct_pkts++;
624 } else {
625 return qdisc_drop(skb, sch, to_free);
626 }
627#ifdef CONFIG_NET_CLS_ACT
628 } else if (!cl) {
629 if (ret & __NET_XMIT_BYPASS)
630 qdisc_qstats_drop(sch);
631 __qdisc_drop(skb, to_free);
632 return ret;
633#endif
634 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
635 to_free)) != NET_XMIT_SUCCESS) {
636 if (net_xmit_drop_count(ret)) {
637 qdisc_qstats_drop(sch);
638 cl->drops++;
639 }
640 return ret;
641 } else {
642 htb_activate(q, cl);
643 }
644
645 sch->qstats.backlog += len;
646 sch->q.qlen++;
647 return NET_XMIT_SUCCESS;
648}
649
650static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
651{
652 s64 toks = diff + cl->tokens;
653
654 if (toks > cl->buffer)
655 toks = cl->buffer;
656 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
657 if (toks <= -cl->mbuffer)
658 toks = 1 - cl->mbuffer;
659
660 cl->tokens = toks;
661}
662
663static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
664{
665 s64 toks = diff + cl->ctokens;
666
667 if (toks > cl->cbuffer)
668 toks = cl->cbuffer;
669 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
670 if (toks <= -cl->mbuffer)
671 toks = 1 - cl->mbuffer;
672
673 cl->ctokens = toks;
674}
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
692 int level, struct sk_buff *skb)
693{
694 int bytes = qdisc_pkt_len(skb);
695 enum htb_cmode old_mode;
696 s64 diff;
697
698 while (cl) {
699 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
700 if (cl->level >= level) {
701 if (cl->level == level)
702 cl->xstats.lends++;
703 htb_accnt_tokens(cl, bytes, diff);
704 } else {
705 cl->xstats.borrows++;
706 cl->tokens += diff;
707 }
708 htb_accnt_ctokens(cl, bytes, diff);
709 cl->t_c = q->now;
710
711 old_mode = cl->cmode;
712 diff = 0;
713 htb_change_class_mode(q, cl, &diff);
714 if (old_mode != cl->cmode) {
715 if (old_mode != HTB_CAN_SEND)
716 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
717 if (cl->cmode != HTB_CAN_SEND)
718 htb_add_to_wait_tree(q, cl, diff);
719 }
720
721
722 if (cl->level)
723 bstats_update(&cl->bstats, skb);
724
725 cl = cl->parent;
726 }
727}
728
729
730
731
732
733
734
735
736
737
738
739static s64 htb_do_events(struct htb_sched *q, const int level,
740 unsigned long start)
741{
742
743
744
745
746 unsigned long stop_at = start + 2;
747 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
748
749 while (time_before(jiffies, stop_at)) {
750 struct htb_class *cl;
751 s64 diff;
752 struct rb_node *p = rb_first(wait_pq);
753
754 if (!p)
755 return 0;
756
757 cl = rb_entry(p, struct htb_class, pq_node);
758 if (cl->pq_key > q->now)
759 return cl->pq_key;
760
761 htb_safe_rb_erase(p, wait_pq);
762 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
763 htb_change_class_mode(q, cl, &diff);
764 if (cl->cmode != HTB_CAN_SEND)
765 htb_add_to_wait_tree(q, cl, diff);
766 }
767
768
769 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
770 pr_warn("htb: too many events!\n");
771 q->warned |= HTB_WARN_TOOMANYEVENTS;
772 }
773
774 return q->now;
775}
776
777
778
779
780static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
781 u32 id)
782{
783 struct rb_node *r = NULL;
784 while (n) {
785 struct htb_class *cl =
786 rb_entry(n, struct htb_class, node[prio]);
787
788 if (id > cl->common.classid) {
789 n = n->rb_right;
790 } else if (id < cl->common.classid) {
791 r = n;
792 n = n->rb_left;
793 } else {
794 return n;
795 }
796 }
797 return r;
798}
799
800
801
802
803
804
805
806
807static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
808{
809 int i;
810 struct {
811 struct rb_node *root;
812 struct rb_node **pptr;
813 u32 *pid;
814 } stk[TC_HTB_MAXDEPTH], *sp = stk;
815
816 BUG_ON(!hprio->row.rb_node);
817 sp->root = hprio->row.rb_node;
818 sp->pptr = &hprio->ptr;
819 sp->pid = &hprio->last_ptr_id;
820
821 for (i = 0; i < 65535; i++) {
822 if (!*sp->pptr && *sp->pid) {
823
824
825
826 *sp->pptr =
827 htb_id_find_next_upper(prio, sp->root, *sp->pid);
828 }
829 *sp->pid = 0;
830
831
832 if (!*sp->pptr) {
833 *sp->pptr = sp->root;
834 while ((*sp->pptr)->rb_left)
835 *sp->pptr = (*sp->pptr)->rb_left;
836 if (sp > stk) {
837 sp--;
838 if (!*sp->pptr) {
839 WARN_ON(1);
840 return NULL;
841 }
842 htb_next_rb_node(sp->pptr);
843 }
844 } else {
845 struct htb_class *cl;
846 struct htb_prio *clp;
847
848 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
849 if (!cl->level)
850 return cl;
851 clp = &cl->inner.clprio[prio];
852 (++sp)->root = clp->feed.rb_node;
853 sp->pptr = &clp->ptr;
854 sp->pid = &clp->last_ptr_id;
855 }
856 }
857 WARN_ON(1);
858 return NULL;
859}
860
861
862
863
864static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
865 const int level)
866{
867 struct sk_buff *skb = NULL;
868 struct htb_class *cl, *start;
869 struct htb_level *hlevel = &q->hlevel[level];
870 struct htb_prio *hprio = &hlevel->hprio[prio];
871
872
873 start = cl = htb_lookup_leaf(hprio, prio);
874
875 do {
876next:
877 if (unlikely(!cl))
878 return NULL;
879
880
881
882
883
884
885 if (unlikely(cl->leaf.q->q.qlen == 0)) {
886 struct htb_class *next;
887 htb_deactivate(q, cl);
888
889
890 if ((q->row_mask[level] & (1 << prio)) == 0)
891 return NULL;
892
893 next = htb_lookup_leaf(hprio, prio);
894
895 if (cl == start)
896 start = next;
897 cl = next;
898 goto next;
899 }
900
901 skb = cl->leaf.q->dequeue(cl->leaf.q);
902 if (likely(skb != NULL))
903 break;
904
905 qdisc_warn_nonwc("htb", cl->leaf.q);
906 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
907 &q->hlevel[0].hprio[prio].ptr);
908 cl = htb_lookup_leaf(hprio, prio);
909
910 } while (cl != start);
911
912 if (likely(skb != NULL)) {
913 bstats_update(&cl->bstats, skb);
914 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
915 if (cl->leaf.deficit[level] < 0) {
916 cl->leaf.deficit[level] += cl->quantum;
917 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
918 &q->hlevel[0].hprio[prio].ptr);
919 }
920
921
922
923 if (!cl->leaf.q->q.qlen)
924 htb_deactivate(q, cl);
925 htb_charge_class(q, cl, level, skb);
926 }
927 return skb;
928}
929
930static struct sk_buff *htb_dequeue(struct Qdisc *sch)
931{
932 struct sk_buff *skb;
933 struct htb_sched *q = qdisc_priv(sch);
934 int level;
935 s64 next_event;
936 unsigned long start_at;
937
938
939 skb = __qdisc_dequeue_head(&q->direct_queue);
940 if (skb != NULL) {
941ok:
942 qdisc_bstats_update(sch, skb);
943 qdisc_qstats_backlog_dec(sch, skb);
944 sch->q.qlen--;
945 return skb;
946 }
947
948 if (!sch->q.qlen)
949 goto fin;
950 q->now = ktime_get_ns();
951 start_at = jiffies;
952
953 next_event = q->now + 5LLU * NSEC_PER_SEC;
954
955 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
956
957 int m;
958 s64 event = q->near_ev_cache[level];
959
960 if (q->now >= event) {
961 event = htb_do_events(q, level, start_at);
962 if (!event)
963 event = q->now + NSEC_PER_SEC;
964 q->near_ev_cache[level] = event;
965 }
966
967 if (next_event > event)
968 next_event = event;
969
970 m = ~q->row_mask[level];
971 while (m != (int)(-1)) {
972 int prio = ffz(m);
973
974 m |= 1 << prio;
975 skb = htb_dequeue_tree(q, prio, level);
976 if (likely(skb != NULL))
977 goto ok;
978 }
979 }
980 if (likely(next_event > q->now))
981 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
982 else
983 schedule_work(&q->work);
984fin:
985 return skb;
986}
987
988
989
990static void htb_reset(struct Qdisc *sch)
991{
992 struct htb_sched *q = qdisc_priv(sch);
993 struct htb_class *cl;
994 unsigned int i;
995
996 for (i = 0; i < q->clhash.hashsize; i++) {
997 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
998 if (cl->level)
999 memset(&cl->inner, 0, sizeof(cl->inner));
1000 else {
1001 if (cl->leaf.q && !q->offload)
1002 qdisc_reset(cl->leaf.q);
1003 }
1004 cl->prio_activity = 0;
1005 cl->cmode = HTB_CAN_SEND;
1006 }
1007 }
1008 qdisc_watchdog_cancel(&q->watchdog);
1009 __qdisc_reset_queue(&q->direct_queue);
1010 sch->q.qlen = 0;
1011 sch->qstats.backlog = 0;
1012 memset(q->hlevel, 0, sizeof(q->hlevel));
1013 memset(q->row_mask, 0, sizeof(q->row_mask));
1014}
1015
1016static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1017 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1018 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1019 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1020 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1021 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1022 [TCA_HTB_RATE64] = { .type = NLA_U64 },
1023 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
1024 [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1025};
1026
1027static void htb_work_func(struct work_struct *work)
1028{
1029 struct htb_sched *q = container_of(work, struct htb_sched, work);
1030 struct Qdisc *sch = q->watchdog.qdisc;
1031
1032 rcu_read_lock();
1033 __netif_schedule(qdisc_root(sch));
1034 rcu_read_unlock();
1035}
1036
1037static void htb_set_lockdep_class_child(struct Qdisc *q)
1038{
1039 static struct lock_class_key child_key;
1040
1041 lockdep_set_class(qdisc_lock(q), &child_key);
1042}
1043
1044static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1045{
1046 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1047}
1048
1049static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1050 struct netlink_ext_ack *extack)
1051{
1052 struct net_device *dev = qdisc_dev(sch);
1053 struct tc_htb_qopt_offload offload_opt;
1054 struct htb_sched *q = qdisc_priv(sch);
1055 struct nlattr *tb[TCA_HTB_MAX + 1];
1056 struct tc_htb_glob *gopt;
1057 unsigned int ntx;
1058 bool offload;
1059 int err;
1060
1061 qdisc_watchdog_init(&q->watchdog, sch);
1062 INIT_WORK(&q->work, htb_work_func);
1063
1064 if (!opt)
1065 return -EINVAL;
1066
1067 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1068 if (err)
1069 return err;
1070
1071 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1072 NULL);
1073 if (err < 0)
1074 return err;
1075
1076 if (!tb[TCA_HTB_INIT])
1077 return -EINVAL;
1078
1079 gopt = nla_data(tb[TCA_HTB_INIT]);
1080 if (gopt->version != HTB_VER >> 16)
1081 return -EINVAL;
1082
1083 offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1084
1085 if (offload) {
1086 if (sch->parent != TC_H_ROOT)
1087 return -EOPNOTSUPP;
1088
1089 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
1090 return -EOPNOTSUPP;
1091
1092 q->num_direct_qdiscs = dev->real_num_tx_queues;
1093 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1094 sizeof(*q->direct_qdiscs),
1095 GFP_KERNEL);
1096 if (!q->direct_qdiscs)
1097 return -ENOMEM;
1098 }
1099
1100 err = qdisc_class_hash_init(&q->clhash);
1101 if (err < 0)
1102 goto err_free_direct_qdiscs;
1103
1104 qdisc_skb_head_init(&q->direct_queue);
1105
1106 if (tb[TCA_HTB_DIRECT_QLEN])
1107 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1108 else
1109 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1110
1111 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1112 q->rate2quantum = 1;
1113 q->defcls = gopt->defcls;
1114
1115 if (!offload)
1116 return 0;
1117
1118 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1119 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1120 struct Qdisc *qdisc;
1121
1122 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1123 TC_H_MAKE(sch->handle, 0), extack);
1124 if (!qdisc) {
1125 err = -ENOMEM;
1126 goto err_free_qdiscs;
1127 }
1128
1129 htb_set_lockdep_class_child(qdisc);
1130 q->direct_qdiscs[ntx] = qdisc;
1131 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1132 }
1133
1134 sch->flags |= TCQ_F_MQROOT;
1135
1136 offload_opt = (struct tc_htb_qopt_offload) {
1137 .command = TC_HTB_CREATE,
1138 .parent_classid = TC_H_MAJ(sch->handle) >> 16,
1139 .classid = TC_H_MIN(q->defcls),
1140 .extack = extack,
1141 };
1142 err = htb_offload(dev, &offload_opt);
1143 if (err)
1144 goto err_free_qdiscs;
1145
1146
1147
1148
1149 q->offload = true;
1150
1151 return 0;
1152
1153err_free_qdiscs:
1154 for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
1155 ntx++)
1156 qdisc_put(q->direct_qdiscs[ntx]);
1157
1158 qdisc_class_hash_destroy(&q->clhash);
1159
1160
1161 q->clhash.hash = NULL;
1162 q->clhash.hashsize = 0;
1163
1164err_free_direct_qdiscs:
1165 kfree(q->direct_qdiscs);
1166 q->direct_qdiscs = NULL;
1167 return err;
1168}
1169
1170static void htb_attach_offload(struct Qdisc *sch)
1171{
1172 struct net_device *dev = qdisc_dev(sch);
1173 struct htb_sched *q = qdisc_priv(sch);
1174 unsigned int ntx;
1175
1176 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1177 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1178
1179 old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1180 qdisc_put(old);
1181 qdisc_hash_add(qdisc, false);
1182 }
1183 for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1184 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1185 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1186
1187 qdisc_put(old);
1188 }
1189
1190 kfree(q->direct_qdiscs);
1191 q->direct_qdiscs = NULL;
1192}
1193
1194static void htb_attach_software(struct Qdisc *sch)
1195{
1196 struct net_device *dev = qdisc_dev(sch);
1197 unsigned int ntx;
1198
1199
1200 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1201 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1202 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1203
1204 qdisc_refcount_inc(sch);
1205
1206 qdisc_put(old);
1207 }
1208}
1209
1210static void htb_attach(struct Qdisc *sch)
1211{
1212 struct htb_sched *q = qdisc_priv(sch);
1213
1214 if (q->offload)
1215 htb_attach_offload(sch);
1216 else
1217 htb_attach_software(sch);
1218}
1219
1220static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1221{
1222 struct htb_sched *q = qdisc_priv(sch);
1223 struct nlattr *nest;
1224 struct tc_htb_glob gopt;
1225
1226 if (q->offload)
1227 sch->flags |= TCQ_F_OFFLOADED;
1228 else
1229 sch->flags &= ~TCQ_F_OFFLOADED;
1230
1231 sch->qstats.overlimits = q->overlimits;
1232
1233
1234
1235
1236 gopt.direct_pkts = q->direct_pkts;
1237 gopt.version = HTB_VER;
1238 gopt.rate2quantum = q->rate2quantum;
1239 gopt.defcls = q->defcls;
1240 gopt.debug = 0;
1241
1242 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1243 if (nest == NULL)
1244 goto nla_put_failure;
1245 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1246 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1247 goto nla_put_failure;
1248 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1249 goto nla_put_failure;
1250
1251 return nla_nest_end(skb, nest);
1252
1253nla_put_failure:
1254 nla_nest_cancel(skb, nest);
1255 return -1;
1256}
1257
1258static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1259 struct sk_buff *skb, struct tcmsg *tcm)
1260{
1261 struct htb_class *cl = (struct htb_class *)arg;
1262 struct htb_sched *q = qdisc_priv(sch);
1263 struct nlattr *nest;
1264 struct tc_htb_opt opt;
1265
1266
1267
1268
1269 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1270 tcm->tcm_handle = cl->common.classid;
1271 if (!cl->level && cl->leaf.q)
1272 tcm->tcm_info = cl->leaf.q->handle;
1273
1274 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1275 if (nest == NULL)
1276 goto nla_put_failure;
1277
1278 memset(&opt, 0, sizeof(opt));
1279
1280 psched_ratecfg_getrate(&opt.rate, &cl->rate);
1281 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1282 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1283 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1284 opt.quantum = cl->quantum;
1285 opt.prio = cl->prio;
1286 opt.level = cl->level;
1287 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1288 goto nla_put_failure;
1289 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1290 goto nla_put_failure;
1291 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1292 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1293 TCA_HTB_PAD))
1294 goto nla_put_failure;
1295 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1296 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1297 TCA_HTB_PAD))
1298 goto nla_put_failure;
1299
1300 return nla_nest_end(skb, nest);
1301
1302nla_put_failure:
1303 nla_nest_cancel(skb, nest);
1304 return -1;
1305}
1306
1307static void htb_offload_aggregate_stats(struct htb_sched *q,
1308 struct htb_class *cl)
1309{
1310 struct htb_class *c;
1311 unsigned int i;
1312
1313 memset(&cl->bstats, 0, sizeof(cl->bstats));
1314
1315 for (i = 0; i < q->clhash.hashsize; i++) {
1316 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1317 struct htb_class *p = c;
1318
1319 while (p && p->level < cl->level)
1320 p = p->parent;
1321
1322 if (p != cl)
1323 continue;
1324
1325 cl->bstats.bytes += c->bstats_bias.bytes;
1326 cl->bstats.packets += c->bstats_bias.packets;
1327 if (c->level == 0) {
1328 cl->bstats.bytes += c->leaf.q->bstats.bytes;
1329 cl->bstats.packets += c->leaf.q->bstats.packets;
1330 }
1331 }
1332 }
1333}
1334
1335static int
1336htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1337{
1338 struct htb_class *cl = (struct htb_class *)arg;
1339 struct htb_sched *q = qdisc_priv(sch);
1340 struct gnet_stats_queue qs = {
1341 .drops = cl->drops,
1342 .overlimits = cl->overlimits,
1343 };
1344 __u32 qlen = 0;
1345
1346 if (!cl->level && cl->leaf.q)
1347 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1348
1349 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1350 INT_MIN, INT_MAX);
1351 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1352 INT_MIN, INT_MAX);
1353
1354 if (q->offload) {
1355 if (!cl->level) {
1356 if (cl->leaf.q)
1357 cl->bstats = cl->leaf.q->bstats;
1358 else
1359 memset(&cl->bstats, 0, sizeof(cl->bstats));
1360 cl->bstats.bytes += cl->bstats_bias.bytes;
1361 cl->bstats.packets += cl->bstats_bias.packets;
1362 } else {
1363 htb_offload_aggregate_stats(q, cl);
1364 }
1365 }
1366
1367 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1368 d, NULL, &cl->bstats) < 0 ||
1369 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1370 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1371 return -1;
1372
1373 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1374}
1375
1376static struct netdev_queue *
1377htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1378{
1379 struct net_device *dev = qdisc_dev(sch);
1380 struct tc_htb_qopt_offload offload_opt;
1381 struct htb_sched *q = qdisc_priv(sch);
1382 int err;
1383
1384 if (!q->offload)
1385 return sch->dev_queue;
1386
1387 offload_opt = (struct tc_htb_qopt_offload) {
1388 .command = TC_HTB_LEAF_QUERY_QUEUE,
1389 .classid = TC_H_MIN(tcm->tcm_parent),
1390 };
1391 err = htb_offload(dev, &offload_opt);
1392 if (err || offload_opt.qid >= dev->num_tx_queues)
1393 return NULL;
1394 return netdev_get_tx_queue(dev, offload_opt.qid);
1395}
1396
1397static struct Qdisc *
1398htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1399{
1400 struct net_device *dev = dev_queue->dev;
1401 struct Qdisc *old_q;
1402
1403 if (dev->flags & IFF_UP)
1404 dev_deactivate(dev);
1405 old_q = dev_graft_qdisc(dev_queue, new_q);
1406 if (new_q)
1407 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1408 if (dev->flags & IFF_UP)
1409 dev_activate(dev);
1410
1411 return old_q;
1412}
1413
1414static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
1415{
1416 struct netdev_queue *queue_old, *queue_new;
1417 struct net_device *dev = qdisc_dev(sch);
1418 struct Qdisc *qdisc;
1419
1420 queue_old = netdev_get_tx_queue(dev, qid_old);
1421 queue_new = netdev_get_tx_queue(dev, qid_new);
1422
1423 if (dev->flags & IFF_UP)
1424 dev_deactivate(dev);
1425 qdisc = dev_graft_qdisc(queue_old, NULL);
1426 qdisc->dev_queue = queue_new;
1427 qdisc = dev_graft_qdisc(queue_new, qdisc);
1428 if (dev->flags & IFF_UP)
1429 dev_activate(dev);
1430
1431 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1432}
1433
1434static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1435 struct Qdisc **old, struct netlink_ext_ack *extack)
1436{
1437 struct netdev_queue *dev_queue = sch->dev_queue;
1438 struct htb_class *cl = (struct htb_class *)arg;
1439 struct htb_sched *q = qdisc_priv(sch);
1440 struct Qdisc *old_q;
1441
1442 if (cl->level)
1443 return -EINVAL;
1444
1445 if (q->offload) {
1446 dev_queue = new->dev_queue;
1447 WARN_ON(dev_queue != cl->leaf.q->dev_queue);
1448 }
1449
1450 if (!new) {
1451 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1452 cl->common.classid, extack);
1453 if (!new)
1454 return -ENOBUFS;
1455 }
1456
1457 if (q->offload) {
1458 htb_set_lockdep_class_child(new);
1459
1460 qdisc_refcount_inc(new);
1461 old_q = htb_graft_helper(dev_queue, new);
1462 }
1463
1464 *old = qdisc_replace(sch, new, &cl->leaf.q);
1465
1466 if (q->offload) {
1467 WARN_ON(old_q != *old);
1468 qdisc_put(old_q);
1469 }
1470
1471 return 0;
1472}
1473
1474static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1475{
1476 struct htb_class *cl = (struct htb_class *)arg;
1477 return !cl->level ? cl->leaf.q : NULL;
1478}
1479
1480static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1481{
1482 struct htb_class *cl = (struct htb_class *)arg;
1483
1484 htb_deactivate(qdisc_priv(sch), cl);
1485}
1486
1487static inline int htb_parent_last_child(struct htb_class *cl)
1488{
1489 if (!cl->parent)
1490
1491 return 0;
1492 if (cl->parent->children > 1)
1493
1494 return 0;
1495 return 1;
1496}
1497
1498static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1499 struct Qdisc *new_q)
1500{
1501 struct htb_sched *q = qdisc_priv(sch);
1502 struct htb_class *parent = cl->parent;
1503
1504 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1505
1506 if (parent->cmode != HTB_CAN_SEND)
1507 htb_safe_rb_erase(&parent->pq_node,
1508 &q->hlevel[parent->level].wait_pq);
1509
1510 parent->level = 0;
1511 memset(&parent->inner, 0, sizeof(parent->inner));
1512 parent->leaf.q = new_q ? new_q : &noop_qdisc;
1513 parent->tokens = parent->buffer;
1514 parent->ctokens = parent->cbuffer;
1515 parent->t_c = ktime_get_ns();
1516 parent->cmode = HTB_CAN_SEND;
1517}
1518
1519static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1520 struct netdev_queue *dev_queue,
1521 struct Qdisc *new_q)
1522{
1523 struct Qdisc *old_q;
1524
1525
1526 if (new_q)
1527 qdisc_refcount_inc(new_q);
1528 old_q = htb_graft_helper(dev_queue, new_q);
1529 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1530}
1531
1532static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1533 bool last_child, bool destroying,
1534 struct netlink_ext_ack *extack)
1535{
1536 struct tc_htb_qopt_offload offload_opt;
1537 struct Qdisc *q = cl->leaf.q;
1538 struct Qdisc *old = NULL;
1539 int err;
1540
1541 if (cl->level)
1542 return -EINVAL;
1543
1544 WARN_ON(!q);
1545 if (!destroying) {
1546
1547
1548
1549
1550
1551 old = htb_graft_helper(q->dev_queue, NULL);
1552 WARN_ON(!old);
1553 WARN_ON(old != q);
1554 }
1555
1556 if (cl->parent) {
1557 cl->parent->bstats_bias.bytes += q->bstats.bytes;
1558 cl->parent->bstats_bias.packets += q->bstats.packets;
1559 }
1560
1561 offload_opt = (struct tc_htb_qopt_offload) {
1562 .command = !last_child ? TC_HTB_LEAF_DEL :
1563 destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1564 TC_HTB_LEAF_DEL_LAST,
1565 .classid = cl->common.classid,
1566 .extack = extack,
1567 };
1568 err = htb_offload(qdisc_dev(sch), &offload_opt);
1569
1570 if (!err || destroying)
1571 qdisc_put(old);
1572 else
1573 htb_graft_helper(q->dev_queue, old);
1574
1575 if (last_child)
1576 return err;
1577
1578 if (!err && offload_opt.moved_qid != 0) {
1579 if (destroying)
1580 q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
1581 offload_opt.qid);
1582 else
1583 htb_offload_move_qdisc(sch, offload_opt.moved_qid,
1584 offload_opt.qid);
1585 }
1586
1587 return err;
1588}
1589
1590static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1591{
1592 if (!cl->level) {
1593 WARN_ON(!cl->leaf.q);
1594 qdisc_put(cl->leaf.q);
1595 }
1596 gen_kill_estimator(&cl->rate_est);
1597 tcf_block_put(cl->block);
1598 kfree(cl);
1599}
1600
1601static void htb_destroy(struct Qdisc *sch)
1602{
1603 struct net_device *dev = qdisc_dev(sch);
1604 struct tc_htb_qopt_offload offload_opt;
1605 struct htb_sched *q = qdisc_priv(sch);
1606 struct hlist_node *next;
1607 bool nonempty, changed;
1608 struct htb_class *cl;
1609 unsigned int i;
1610
1611 cancel_work_sync(&q->work);
1612 qdisc_watchdog_cancel(&q->watchdog);
1613
1614
1615
1616
1617
1618 tcf_block_put(q->block);
1619
1620 for (i = 0; i < q->clhash.hashsize; i++) {
1621 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1622 tcf_block_put(cl->block);
1623 cl->block = NULL;
1624 }
1625 }
1626
1627 do {
1628 nonempty = false;
1629 changed = false;
1630 for (i = 0; i < q->clhash.hashsize; i++) {
1631 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1632 common.hnode) {
1633 bool last_child;
1634
1635 if (!q->offload) {
1636 htb_destroy_class(sch, cl);
1637 continue;
1638 }
1639
1640 nonempty = true;
1641
1642 if (cl->level)
1643 continue;
1644
1645 changed = true;
1646
1647 last_child = htb_parent_last_child(cl);
1648 htb_destroy_class_offload(sch, cl, last_child,
1649 true, NULL);
1650 qdisc_class_hash_remove(&q->clhash,
1651 &cl->common);
1652 if (cl->parent)
1653 cl->parent->children--;
1654 if (last_child)
1655 htb_parent_to_leaf(sch, cl, NULL);
1656 htb_destroy_class(sch, cl);
1657 }
1658 }
1659 } while (changed);
1660 WARN_ON(nonempty);
1661
1662 qdisc_class_hash_destroy(&q->clhash);
1663 __qdisc_reset_queue(&q->direct_queue);
1664
1665 if (!q->offload)
1666 return;
1667
1668 offload_opt = (struct tc_htb_qopt_offload) {
1669 .command = TC_HTB_DESTROY,
1670 };
1671 htb_offload(dev, &offload_opt);
1672
1673 if (!q->direct_qdiscs)
1674 return;
1675 for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1676 qdisc_put(q->direct_qdiscs[i]);
1677 kfree(q->direct_qdiscs);
1678}
1679
1680static int htb_delete(struct Qdisc *sch, unsigned long arg,
1681 struct netlink_ext_ack *extack)
1682{
1683 struct htb_sched *q = qdisc_priv(sch);
1684 struct htb_class *cl = (struct htb_class *)arg;
1685 struct Qdisc *new_q = NULL;
1686 int last_child = 0;
1687 int err;
1688
1689
1690
1691
1692
1693 if (cl->children || cl->filter_cnt)
1694 return -EBUSY;
1695
1696 if (!cl->level && htb_parent_last_child(cl))
1697 last_child = 1;
1698
1699 if (q->offload) {
1700 err = htb_destroy_class_offload(sch, cl, last_child, false,
1701 extack);
1702 if (err)
1703 return err;
1704 }
1705
1706 if (last_child) {
1707 struct netdev_queue *dev_queue;
1708
1709 dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
1710 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1711 cl->parent->common.classid,
1712 NULL);
1713 if (q->offload) {
1714 if (new_q)
1715 htb_set_lockdep_class_child(new_q);
1716 htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1717 }
1718 }
1719
1720 sch_tree_lock(sch);
1721
1722 if (!cl->level)
1723 qdisc_purge_queue(cl->leaf.q);
1724
1725
1726 qdisc_class_hash_remove(&q->clhash, &cl->common);
1727 if (cl->parent)
1728 cl->parent->children--;
1729
1730 if (cl->prio_activity)
1731 htb_deactivate(q, cl);
1732
1733 if (cl->cmode != HTB_CAN_SEND)
1734 htb_safe_rb_erase(&cl->pq_node,
1735 &q->hlevel[cl->level].wait_pq);
1736
1737 if (last_child)
1738 htb_parent_to_leaf(sch, cl, new_q);
1739
1740 sch_tree_unlock(sch);
1741
1742 htb_destroy_class(sch, cl);
1743 return 0;
1744}
1745
1746static int htb_change_class(struct Qdisc *sch, u32 classid,
1747 u32 parentid, struct nlattr **tca,
1748 unsigned long *arg, struct netlink_ext_ack *extack)
1749{
1750 int err = -EINVAL;
1751 struct htb_sched *q = qdisc_priv(sch);
1752 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1753 struct tc_htb_qopt_offload offload_opt;
1754 struct nlattr *opt = tca[TCA_OPTIONS];
1755 struct nlattr *tb[TCA_HTB_MAX + 1];
1756 struct Qdisc *parent_qdisc = NULL;
1757 struct netdev_queue *dev_queue;
1758 struct tc_htb_opt *hopt;
1759 u64 rate64, ceil64;
1760 int warn = 0;
1761
1762
1763 if (!opt)
1764 goto failure;
1765
1766 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1767 NULL);
1768 if (err < 0)
1769 goto failure;
1770
1771 err = -EINVAL;
1772 if (tb[TCA_HTB_PARMS] == NULL)
1773 goto failure;
1774
1775 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1776
1777 hopt = nla_data(tb[TCA_HTB_PARMS]);
1778 if (!hopt->rate.rate || !hopt->ceil.rate)
1779 goto failure;
1780
1781
1782 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1783 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1784 NULL));
1785
1786 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1787 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1788 NULL));
1789
1790 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1791 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1792
1793 if (!cl) {
1794 struct net_device *dev = qdisc_dev(sch);
1795 struct Qdisc *new_q, *old_q;
1796 int prio;
1797 struct {
1798 struct nlattr nla;
1799 struct gnet_estimator opt;
1800 } est = {
1801 .nla = {
1802 .nla_len = nla_attr_size(sizeof(est.opt)),
1803 .nla_type = TCA_RATE,
1804 },
1805 .opt = {
1806
1807 .interval = 2,
1808 .ewma_log = 2,
1809 },
1810 };
1811
1812
1813 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1814 htb_find(classid, sch))
1815 goto failure;
1816
1817
1818 if (parent && parent->parent && parent->parent->level < 2) {
1819 pr_err("htb: tree is too deep\n");
1820 goto failure;
1821 }
1822 err = -ENOBUFS;
1823 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1824 if (!cl)
1825 goto failure;
1826
1827 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1828 if (err) {
1829 kfree(cl);
1830 goto failure;
1831 }
1832 if (htb_rate_est || tca[TCA_RATE]) {
1833 err = gen_new_estimator(&cl->bstats, NULL,
1834 &cl->rate_est,
1835 NULL,
1836 qdisc_root_sleeping_running(sch),
1837 tca[TCA_RATE] ? : &est.nla);
1838 if (err)
1839 goto err_block_put;
1840 }
1841
1842 cl->children = 0;
1843 RB_CLEAR_NODE(&cl->pq_node);
1844
1845 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1846 RB_CLEAR_NODE(&cl->node[prio]);
1847
1848 cl->common.classid = classid;
1849
1850
1851
1852
1853 ASSERT_RTNL();
1854
1855
1856
1857
1858
1859 if (!q->offload) {
1860 dev_queue = sch->dev_queue;
1861 } else if (!(parent && !parent->level)) {
1862
1863 offload_opt = (struct tc_htb_qopt_offload) {
1864 .command = TC_HTB_LEAF_ALLOC_QUEUE,
1865 .classid = cl->common.classid,
1866 .parent_classid = parent ?
1867 TC_H_MIN(parent->common.classid) :
1868 TC_HTB_CLASSID_ROOT,
1869 .rate = max_t(u64, hopt->rate.rate, rate64),
1870 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1871 .extack = extack,
1872 };
1873 err = htb_offload(dev, &offload_opt);
1874 if (err) {
1875 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1876 err);
1877 goto err_kill_estimator;
1878 }
1879 dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1880 } else {
1881 dev_queue = parent->leaf.q->dev_queue;
1882 old_q = htb_graft_helper(dev_queue, NULL);
1883 WARN_ON(old_q != parent->leaf.q);
1884 offload_opt = (struct tc_htb_qopt_offload) {
1885 .command = TC_HTB_LEAF_TO_INNER,
1886 .classid = cl->common.classid,
1887 .parent_classid =
1888 TC_H_MIN(parent->common.classid),
1889 .rate = max_t(u64, hopt->rate.rate, rate64),
1890 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1891 .extack = extack,
1892 };
1893 err = htb_offload(dev, &offload_opt);
1894 if (err) {
1895 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1896 err);
1897 htb_graft_helper(dev_queue, old_q);
1898 goto err_kill_estimator;
1899 }
1900 parent->bstats_bias.bytes += old_q->bstats.bytes;
1901 parent->bstats_bias.packets += old_q->bstats.packets;
1902 qdisc_put(old_q);
1903 }
1904 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1905 classid, NULL);
1906 if (q->offload) {
1907 if (new_q) {
1908 htb_set_lockdep_class_child(new_q);
1909
1910
1911
1912 qdisc_refcount_inc(new_q);
1913 }
1914 old_q = htb_graft_helper(dev_queue, new_q);
1915
1916 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1917 }
1918 sch_tree_lock(sch);
1919 if (parent && !parent->level) {
1920
1921 qdisc_purge_queue(parent->leaf.q);
1922 parent_qdisc = parent->leaf.q;
1923 if (parent->prio_activity)
1924 htb_deactivate(q, parent);
1925
1926
1927 if (parent->cmode != HTB_CAN_SEND) {
1928 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1929 parent->cmode = HTB_CAN_SEND;
1930 }
1931 parent->level = (parent->parent ? parent->parent->level
1932 : TC_HTB_MAXDEPTH) - 1;
1933 memset(&parent->inner, 0, sizeof(parent->inner));
1934 }
1935
1936
1937 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1938
1939 cl->parent = parent;
1940
1941
1942 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1943 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1944 cl->mbuffer = 60ULL * NSEC_PER_SEC;
1945 cl->t_c = ktime_get_ns();
1946 cl->cmode = HTB_CAN_SEND;
1947
1948
1949 qdisc_class_hash_insert(&q->clhash, &cl->common);
1950 if (parent)
1951 parent->children++;
1952 if (cl->leaf.q != &noop_qdisc)
1953 qdisc_hash_add(cl->leaf.q, true);
1954 } else {
1955 if (tca[TCA_RATE]) {
1956 err = gen_replace_estimator(&cl->bstats, NULL,
1957 &cl->rate_est,
1958 NULL,
1959 qdisc_root_sleeping_running(sch),
1960 tca[TCA_RATE]);
1961 if (err)
1962 return err;
1963 }
1964
1965 if (q->offload) {
1966 struct net_device *dev = qdisc_dev(sch);
1967
1968 offload_opt = (struct tc_htb_qopt_offload) {
1969 .command = TC_HTB_NODE_MODIFY,
1970 .classid = cl->common.classid,
1971 .rate = max_t(u64, hopt->rate.rate, rate64),
1972 .ceil = max_t(u64, hopt->ceil.rate, ceil64),
1973 .extack = extack,
1974 };
1975 err = htb_offload(dev, &offload_opt);
1976 if (err)
1977
1978
1979
1980
1981
1982
1983 return err;
1984 }
1985
1986 sch_tree_lock(sch);
1987 }
1988
1989 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1990 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1991
1992
1993
1994
1995 if (!cl->level) {
1996 u64 quantum = cl->rate.rate_bytes_ps;
1997
1998 do_div(quantum, q->rate2quantum);
1999 cl->quantum = min_t(u64, quantum, INT_MAX);
2000
2001 if (!hopt->quantum && cl->quantum < 1000) {
2002 warn = -1;
2003 cl->quantum = 1000;
2004 }
2005 if (!hopt->quantum && cl->quantum > 200000) {
2006 warn = 1;
2007 cl->quantum = 200000;
2008 }
2009 if (hopt->quantum)
2010 cl->quantum = hopt->quantum;
2011 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2012 cl->prio = TC_HTB_NUMPRIO - 1;
2013 }
2014
2015 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2016 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2017
2018 sch_tree_unlock(sch);
2019 qdisc_put(parent_qdisc);
2020
2021 if (warn)
2022 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2023 cl->common.classid, (warn == -1 ? "small" : "big"));
2024
2025 qdisc_class_hash_grow(sch, &q->clhash);
2026
2027 *arg = (unsigned long)cl;
2028 return 0;
2029
2030err_kill_estimator:
2031 gen_kill_estimator(&cl->rate_est);
2032err_block_put:
2033 tcf_block_put(cl->block);
2034 kfree(cl);
2035failure:
2036 return err;
2037}
2038
2039static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2040 struct netlink_ext_ack *extack)
2041{
2042 struct htb_sched *q = qdisc_priv(sch);
2043 struct htb_class *cl = (struct htb_class *)arg;
2044
2045 return cl ? cl->block : q->block;
2046}
2047
2048static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2049 u32 classid)
2050{
2051 struct htb_class *cl = htb_find(classid, sch);
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062 if (cl)
2063 cl->filter_cnt++;
2064 return (unsigned long)cl;
2065}
2066
2067static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2068{
2069 struct htb_class *cl = (struct htb_class *)arg;
2070
2071 if (cl)
2072 cl->filter_cnt--;
2073}
2074
2075static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2076{
2077 struct htb_sched *q = qdisc_priv(sch);
2078 struct htb_class *cl;
2079 unsigned int i;
2080
2081 if (arg->stop)
2082 return;
2083
2084 for (i = 0; i < q->clhash.hashsize; i++) {
2085 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2086 if (arg->count < arg->skip) {
2087 arg->count++;
2088 continue;
2089 }
2090 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2091 arg->stop = 1;
2092 return;
2093 }
2094 arg->count++;
2095 }
2096 }
2097}
2098
2099static const struct Qdisc_class_ops htb_class_ops = {
2100 .select_queue = htb_select_queue,
2101 .graft = htb_graft,
2102 .leaf = htb_leaf,
2103 .qlen_notify = htb_qlen_notify,
2104 .find = htb_search,
2105 .change = htb_change_class,
2106 .delete = htb_delete,
2107 .walk = htb_walk,
2108 .tcf_block = htb_tcf_block,
2109 .bind_tcf = htb_bind_filter,
2110 .unbind_tcf = htb_unbind_filter,
2111 .dump = htb_dump_class,
2112 .dump_stats = htb_dump_class_stats,
2113};
2114
2115static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2116 .cl_ops = &htb_class_ops,
2117 .id = "htb",
2118 .priv_size = sizeof(struct htb_sched),
2119 .enqueue = htb_enqueue,
2120 .dequeue = htb_dequeue,
2121 .peek = qdisc_peek_dequeued,
2122 .init = htb_init,
2123 .attach = htb_attach,
2124 .reset = htb_reset,
2125 .destroy = htb_destroy,
2126 .dump = htb_dump,
2127 .owner = THIS_MODULE,
2128};
2129
2130static int __init htb_module_init(void)
2131{
2132 return register_qdisc(&htb_qdisc_ops);
2133}
2134static void __exit htb_module_exit(void)
2135{
2136 unregister_qdisc(&htb_qdisc_ops);
2137}
2138
2139module_init(htb_module_init)
2140module_exit(htb_module_exit)
2141MODULE_LICENSE("GPL");
2142