1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/config.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/sched.h>
22#include <linux/string.h>
23#include <linux/mm.h>
24#include <linux/socket.h>
25#include <linux/sockios.h>
26#include <linux/in.h>
27#include <linux/errno.h>
28#include <linux/interrupt.h>
29#include <linux/netdevice.h>
30#include <linux/skbuff.h>
31#include <linux/rtnetlink.h>
32#include <linux/init.h>
33#include <linux/proc_fs.h>
34#include <linux/kmod.h>
35#include <linux/list.h>
36
37#include <net/sock.h>
38#include <net/pkt_sched.h>
39
40#include <asm/processor.h>
41#include <asm/uaccess.h>
42#include <asm/system.h>
43#include <asm/bitops.h>
44
45static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
46 struct Qdisc *old, struct Qdisc *new);
47static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
48 struct Qdisc *q, unsigned long cl, int event);
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132static rwlock_t qdisc_mod_lock = RW_LOCK_UNLOCKED;
133
134
135
136
137
138
139
140
141
142static struct Qdisc_ops *qdisc_base = NULL;
143
144
145
146int register_qdisc(struct Qdisc_ops *qops)
147{
148 struct Qdisc_ops *q, **qp;
149
150 write_lock(&qdisc_mod_lock);
151 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) {
152 if (strcmp(qops->id, q->id) == 0) {
153 write_unlock(&qdisc_mod_lock);
154 return -EEXIST;
155 }
156 }
157
158 if (qops->enqueue == NULL)
159 qops->enqueue = noop_qdisc_ops.enqueue;
160 if (qops->requeue == NULL)
161 qops->requeue = noop_qdisc_ops.requeue;
162 if (qops->dequeue == NULL)
163 qops->dequeue = noop_qdisc_ops.dequeue;
164
165 qops->next = NULL;
166 *qp = qops;
167 write_unlock(&qdisc_mod_lock);
168 return 0;
169}
170
171int unregister_qdisc(struct Qdisc_ops *qops)
172{
173 struct Qdisc_ops *q, **qp;
174 int err = -ENOENT;
175
176 write_lock(&qdisc_mod_lock);
177 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
178 if (q == qops)
179 break;
180 if (q) {
181 *qp = q->next;
182 q->next = NULL;
183 err = 0;
184 }
185 write_unlock(&qdisc_mod_lock);
186 return err;
187}
188
189
190
191
192
193struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
194{
195 struct Qdisc *q;
196
197 list_for_each_entry(q, &dev->qdisc_list, list) {
198 if (q->handle == handle)
199 return q;
200 }
201 return NULL;
202}
203
204struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
205{
206 unsigned long cl;
207 struct Qdisc *leaf;
208 struct Qdisc_class_ops *cops = p->ops->cl_ops;
209
210 if (cops == NULL)
211 return NULL;
212 cl = cops->get(p, classid);
213
214 if (cl == 0)
215 return NULL;
216 leaf = cops->leaf(p, cl);
217 cops->put(p, cl);
218 return leaf;
219}
220
221
222
223struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
224{
225 struct Qdisc_ops *q = NULL;
226
227 if (kind) {
228 read_lock(&qdisc_mod_lock);
229 for (q = qdisc_base; q; q = q->next) {
230 if (rtattr_strcmp(kind, q->id) == 0)
231 break;
232 }
233 read_unlock(&qdisc_mod_lock);
234 }
235 return q;
236}
237
238static struct qdisc_rate_table *qdisc_rtab_list;
239
240struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
241{
242 struct qdisc_rate_table *rtab;
243
244 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
245 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
246 rtab->refcnt++;
247 return rtab;
248 }
249 }
250
251 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
252 return NULL;
253
254 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
255 if (rtab) {
256 rtab->rate = *r;
257 rtab->refcnt = 1;
258 memcpy(rtab->data, RTA_DATA(tab), 1024);
259 rtab->next = qdisc_rtab_list;
260 qdisc_rtab_list = rtab;
261 }
262 return rtab;
263}
264
265void qdisc_put_rtab(struct qdisc_rate_table *tab)
266{
267 struct qdisc_rate_table *rtab, **rtabp;
268
269 if (!tab || --tab->refcnt)
270 return;
271
272 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
273 if (rtab == tab) {
274 *rtabp = rtab->next;
275 kfree(rtab);
276 return;
277 }
278 }
279}
280
281
282
283
284u32 qdisc_alloc_handle(struct net_device *dev)
285{
286 int i = 0x10000;
287 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
288
289 do {
290 autohandle += TC_H_MAKE(0x10000U, 0);
291 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
292 autohandle = TC_H_MAKE(0x80000000U, 0);
293 } while (qdisc_lookup(dev, autohandle) && --i > 0);
294
295 return i>0 ? autohandle : 0;
296}
297
298
299
300static struct Qdisc *
301dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
302{
303 struct Qdisc *oqdisc;
304
305 if (dev->flags & IFF_UP)
306 dev_deactivate(dev);
307
308 write_lock(&qdisc_tree_lock);
309 spin_lock_bh(&dev->queue_lock);
310 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
311 oqdisc = dev->qdisc_ingress;
312
313 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
314
315 qdisc_reset(oqdisc);
316 dev->qdisc_ingress = NULL;
317 } else {
318 dev->qdisc_ingress = qdisc;
319 }
320
321 } else {
322
323 oqdisc = dev->qdisc_sleeping;
324
325
326 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
327 qdisc_reset(oqdisc);
328
329
330 if (qdisc == NULL)
331 qdisc = &noop_qdisc;
332 dev->qdisc_sleeping = qdisc;
333 dev->qdisc = &noop_qdisc;
334 }
335
336 spin_unlock_bh(&dev->queue_lock);
337 write_unlock(&qdisc_tree_lock);
338
339 if (dev->flags & IFF_UP)
340 dev_activate(dev);
341
342 return oqdisc;
343}
344
345
346
347
348
349
350
351
352int qdisc_graft(struct net_device *dev, struct Qdisc *parent, u32 classid,
353 struct Qdisc *new, struct Qdisc **old)
354{
355 int err = 0;
356 struct Qdisc *q = *old;
357
358
359 if (parent == NULL) {
360 if (q && q->flags&TCQ_F_INGRESS) {
361 *old = dev_graft_qdisc(dev, q);
362 } else {
363 *old = dev_graft_qdisc(dev, new);
364 }
365 } else {
366 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
367
368 err = -EINVAL;
369
370 if (cops) {
371 unsigned long cl = cops->get(parent, classid);
372 if (cl) {
373 err = cops->graft(parent, cl, new, old);
374 if (new)
375 new->parent = classid;
376 cops->put(parent, cl);
377 }
378 }
379 }
380 return err;
381}
382
383
384
385
386
387
388
389static struct Qdisc *
390qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
391{
392 int err;
393 struct rtattr *kind = tca[TCA_KIND-1];
394 struct Qdisc *sch = NULL;
395 struct Qdisc_ops *ops;
396 int size;
397
398 ops = qdisc_lookup_ops(kind);
399#ifdef CONFIG_KMOD
400 if (ops==NULL && tca[TCA_KIND-1] != NULL) {
401 char module_name[4 + IFNAMSIZ + 1];
402
403 if (RTA_PAYLOAD(kind) <= IFNAMSIZ) {
404 sprintf(module_name, "sch_%s", (char*)RTA_DATA(kind));
405 request_module (module_name);
406 ops = qdisc_lookup_ops(kind);
407 }
408 }
409#endif
410
411 err = -EINVAL;
412 if (ops == NULL)
413 goto err_out;
414
415 size = sizeof(*sch) + ops->priv_size;
416
417 sch = kmalloc(size, GFP_KERNEL);
418 err = -ENOBUFS;
419 if (!sch)
420 goto err_out;
421
422
423
424 err = -EINVAL;
425 if (ops != qdisc_lookup_ops(kind))
426 goto err_out;
427
428 memset(sch, 0, size);
429
430 INIT_LIST_HEAD(&sch->list);
431 skb_queue_head_init(&sch->q);
432
433 if (handle == TC_H_INGRESS)
434 sch->flags |= TCQ_F_INGRESS;
435
436 sch->ops = ops;
437 sch->enqueue = ops->enqueue;
438 sch->dequeue = ops->dequeue;
439 sch->dev = dev;
440 atomic_set(&sch->refcnt, 1);
441 sch->stats.lock = &dev->queue_lock;
442 if (handle == 0) {
443 handle = qdisc_alloc_handle(dev);
444 err = -ENOMEM;
445 if (handle == 0)
446 goto err_out;
447 }
448
449 if (handle == TC_H_INGRESS)
450 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
451 else
452 sch->handle = handle;
453
454 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
455 write_lock(&qdisc_tree_lock);
456 list_add_tail(&sch->list, &dev->qdisc_list);
457 write_unlock(&qdisc_tree_lock);
458#ifdef CONFIG_NET_ESTIMATOR
459 if (tca[TCA_RATE-1])
460 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
461#endif
462 return sch;
463 }
464
465err_out:
466 *errp = err;
467 if (sch)
468 kfree(sch);
469 return NULL;
470}
471
472static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
473{
474 if (tca[TCA_OPTIONS-1]) {
475 int err;
476
477 if (sch->ops->change == NULL)
478 return -EINVAL;
479 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
480 if (err)
481 return err;
482 }
483#ifdef CONFIG_NET_ESTIMATOR
484 if (tca[TCA_RATE-1]) {
485 qdisc_kill_estimator(&sch->stats);
486 qdisc_new_estimator(&sch->stats, tca[TCA_RATE-1]);
487 }
488#endif
489 return 0;
490}
491
492struct check_loop_arg
493{
494 struct qdisc_walker w;
495 struct Qdisc *p;
496 int depth;
497};
498
499static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
500
501static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
502{
503 struct check_loop_arg arg;
504
505 if (q->ops->cl_ops == NULL)
506 return 0;
507
508 arg.w.stop = arg.w.skip = arg.w.count = 0;
509 arg.w.fn = check_loop_fn;
510 arg.depth = depth;
511 arg.p = p;
512 q->ops->cl_ops->walk(q, &arg.w);
513 return arg.w.stop ? -ELOOP : 0;
514}
515
516static int
517check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
518{
519 struct Qdisc *leaf;
520 struct Qdisc_class_ops *cops = q->ops->cl_ops;
521 struct check_loop_arg *arg = (struct check_loop_arg *)w;
522
523 leaf = cops->leaf(q, cl);
524 if (leaf) {
525 if (leaf == arg->p || arg->depth > 7)
526 return -ELOOP;
527 return check_loop(leaf, arg->p, arg->depth + 1);
528 }
529 return 0;
530}
531
532
533
534
535
536static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
537{
538 struct tcmsg *tcm = NLMSG_DATA(n);
539 struct rtattr **tca = arg;
540 struct net_device *dev;
541 u32 clid = tcm->tcm_parent;
542 struct Qdisc *q = NULL;
543 struct Qdisc *p = NULL;
544 int err;
545
546 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
547 return -ENODEV;
548
549 if (clid) {
550 if (clid != TC_H_ROOT) {
551 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
552 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
553 return -ENOENT;
554 q = qdisc_leaf(p, clid);
555 } else {
556 q = dev->qdisc_ingress;
557 }
558 } else {
559 q = dev->qdisc_sleeping;
560 }
561 if (!q)
562 return -ENOENT;
563
564 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
565 return -EINVAL;
566 } else {
567 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
568 return -ENOENT;
569 }
570
571 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
572 return -EINVAL;
573
574 if (n->nlmsg_type == RTM_DELQDISC) {
575 if (!clid)
576 return -EINVAL;
577 if (q->handle == 0)
578 return -ENOENT;
579 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
580 return err;
581 if (q) {
582 qdisc_notify(skb, n, clid, q, NULL);
583 spin_lock_bh(&dev->queue_lock);
584 qdisc_destroy(q);
585 spin_unlock_bh(&dev->queue_lock);
586 }
587 } else {
588 qdisc_notify(skb, n, clid, NULL, q);
589 }
590 return 0;
591}
592
593
594
595
596
597static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
598{
599 struct tcmsg *tcm = NLMSG_DATA(n);
600 struct rtattr **tca = arg;
601 struct net_device *dev;
602 u32 clid = tcm->tcm_parent;
603 struct Qdisc *q = NULL;
604 struct Qdisc *p = NULL;
605 int err;
606
607 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
608 return -ENODEV;
609
610 if (clid) {
611 if (clid != TC_H_ROOT) {
612 if (clid != TC_H_INGRESS) {
613 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
614 return -ENOENT;
615 q = qdisc_leaf(p, clid);
616 } else {
617 q = dev->qdisc_ingress;
618 }
619 } else {
620 q = dev->qdisc_sleeping;
621 }
622
623
624 if (q && q->handle == 0)
625 q = NULL;
626
627 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
628 if (tcm->tcm_handle) {
629 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
630 return -EEXIST;
631 if (TC_H_MIN(tcm->tcm_handle))
632 return -EINVAL;
633 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
634 goto create_n_graft;
635 if (n->nlmsg_flags&NLM_F_EXCL)
636 return -EEXIST;
637 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
638 return -EINVAL;
639 if (q == p ||
640 (p && check_loop(q, p, 0)))
641 return -ELOOP;
642 atomic_inc(&q->refcnt);
643 goto graft;
644 } else {
645 if (q == NULL)
646 goto create_n_graft;
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667 if ((n->nlmsg_flags&NLM_F_CREATE) &&
668 (n->nlmsg_flags&NLM_F_REPLACE) &&
669 ((n->nlmsg_flags&NLM_F_EXCL) ||
670 (tca[TCA_KIND-1] &&
671 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
672 goto create_n_graft;
673 }
674 }
675 } else {
676 if (!tcm->tcm_handle)
677 return -EINVAL;
678 q = qdisc_lookup(dev, tcm->tcm_handle);
679 }
680
681
682 if (q == NULL)
683 return -ENOENT;
684 if (n->nlmsg_flags&NLM_F_EXCL)
685 return -EEXIST;
686 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
687 return -EINVAL;
688 err = qdisc_change(q, tca);
689 if (err == 0)
690 qdisc_notify(skb, n, clid, NULL, q);
691 return err;
692
693create_n_graft:
694 if (!(n->nlmsg_flags&NLM_F_CREATE))
695 return -ENOENT;
696 if (clid == TC_H_INGRESS)
697 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
698 else
699 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
700 if (q == NULL)
701 return err;
702
703graft:
704 if (1) {
705 struct Qdisc *old_q = NULL;
706 err = qdisc_graft(dev, p, clid, q, &old_q);
707 if (err) {
708 if (q) {
709 spin_lock_bh(&dev->queue_lock);
710 qdisc_destroy(q);
711 spin_unlock_bh(&dev->queue_lock);
712 }
713 return err;
714 }
715 qdisc_notify(skb, n, clid, old_q, q);
716 if (old_q) {
717 spin_lock_bh(&dev->queue_lock);
718 qdisc_destroy(old_q);
719 spin_unlock_bh(&dev->queue_lock);
720 }
721 }
722 return 0;
723}
724
725int qdisc_copy_stats(struct sk_buff *skb, struct tc_stats *st)
726{
727 spin_lock_bh(st->lock);
728 RTA_PUT(skb, TCA_STATS, (char*)&st->lock - (char*)st, st);
729 spin_unlock_bh(st->lock);
730 return 0;
731
732rtattr_failure:
733 spin_unlock_bh(st->lock);
734 return -1;
735}
736
737
738static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
739 u32 pid, u32 seq, unsigned flags, int event)
740{
741 struct tcmsg *tcm;
742 struct nlmsghdr *nlh;
743 unsigned char *b = skb->tail;
744
745 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
746 nlh->nlmsg_flags = flags;
747 tcm = NLMSG_DATA(nlh);
748 tcm->tcm_family = AF_UNSPEC;
749 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
750 tcm->tcm_parent = clid;
751 tcm->tcm_handle = q->handle;
752 tcm->tcm_info = atomic_read(&q->refcnt);
753 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
754 if (q->ops->dump && q->ops->dump(q, skb) < 0)
755 goto rtattr_failure;
756 q->stats.qlen = q->q.qlen;
757 if (qdisc_copy_stats(skb, &q->stats))
758 goto rtattr_failure;
759 nlh->nlmsg_len = skb->tail - b;
760 return skb->len;
761
762nlmsg_failure:
763rtattr_failure:
764 skb_trim(skb, b - skb->data);
765 return -1;
766}
767
768static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
769 u32 clid, struct Qdisc *old, struct Qdisc *new)
770{
771 struct sk_buff *skb;
772 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
773
774 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
775 if (!skb)
776 return -ENOBUFS;
777
778 if (old && old->handle) {
779 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
780 goto err_out;
781 }
782 if (new) {
783 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
784 goto err_out;
785 }
786
787 if (skb->len)
788 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
789
790err_out:
791 kfree_skb(skb);
792 return -EINVAL;
793}
794
795static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
796{
797 int idx, q_idx;
798 int s_idx, s_q_idx;
799 struct net_device *dev;
800 struct Qdisc *q;
801
802 s_idx = cb->args[0];
803 s_q_idx = q_idx = cb->args[1];
804 read_lock(&dev_base_lock);
805 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
806 if (idx < s_idx)
807 continue;
808 if (idx > s_idx)
809 s_q_idx = 0;
810 read_lock(&qdisc_tree_lock);
811 q_idx = 0;
812 list_for_each_entry(q, &dev->qdisc_list, list) {
813 if (q_idx < s_q_idx) {
814 q_idx++;
815 continue;
816 }
817 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
818 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
819 read_unlock(&qdisc_tree_lock);
820 goto done;
821 }
822 q_idx++;
823 }
824 read_unlock(&qdisc_tree_lock);
825 }
826
827done:
828 read_unlock(&dev_base_lock);
829
830 cb->args[0] = idx;
831 cb->args[1] = q_idx;
832
833 return skb->len;
834}
835
836
837
838
839
840
841
842
843
844static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
845{
846 struct tcmsg *tcm = NLMSG_DATA(n);
847 struct rtattr **tca = arg;
848 struct net_device *dev;
849 struct Qdisc *q = NULL;
850 struct Qdisc_class_ops *cops;
851 unsigned long cl = 0;
852 unsigned long new_cl;
853 u32 pid = tcm->tcm_parent;
854 u32 clid = tcm->tcm_handle;
855 u32 qid = TC_H_MAJ(clid);
856 int err;
857
858 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
859 return -ENODEV;
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876 if (pid != TC_H_ROOT) {
877 u32 qid1 = TC_H_MAJ(pid);
878
879 if (qid && qid1) {
880
881 if (qid != qid1)
882 return -EINVAL;
883 } else if (qid1) {
884 qid = qid1;
885 } else if (qid == 0)
886 qid = dev->qdisc_sleeping->handle;
887
888
889
890
891
892
893 if (pid)
894 pid = TC_H_MAKE(qid, pid);
895 } else {
896 if (qid == 0)
897 qid = dev->qdisc_sleeping->handle;
898 }
899
900
901 if ((q = qdisc_lookup(dev, qid)) == NULL)
902 return -ENOENT;
903
904
905 cops = q->ops->cl_ops;
906 if (cops == NULL)
907 return -EINVAL;
908
909
910 if (clid == 0) {
911 if (pid == TC_H_ROOT)
912 clid = qid;
913 } else
914 clid = TC_H_MAKE(qid, clid);
915
916 if (clid)
917 cl = cops->get(q, clid);
918
919 if (cl == 0) {
920 err = -ENOENT;
921 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
922 goto out;
923 } else {
924 switch (n->nlmsg_type) {
925 case RTM_NEWTCLASS:
926 err = -EEXIST;
927 if (n->nlmsg_flags&NLM_F_EXCL)
928 goto out;
929 break;
930 case RTM_DELTCLASS:
931 err = cops->delete(q, cl);
932 if (err == 0)
933 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
934 goto out;
935 case RTM_GETTCLASS:
936 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
937 goto out;
938 default:
939 err = -EINVAL;
940 goto out;
941 }
942 }
943
944 new_cl = cl;
945 err = cops->change(q, clid, pid, tca, &new_cl);
946 if (err == 0)
947 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
948
949out:
950 if (cl)
951 cops->put(q, cl);
952
953 return err;
954}
955
956
957static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
958 unsigned long cl,
959 u32 pid, u32 seq, unsigned flags, int event)
960{
961 struct tcmsg *tcm;
962 struct nlmsghdr *nlh;
963 unsigned char *b = skb->tail;
964
965 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*tcm));
966 nlh->nlmsg_flags = flags;
967 tcm = NLMSG_DATA(nlh);
968 tcm->tcm_family = AF_UNSPEC;
969 tcm->tcm_ifindex = q->dev ? q->dev->ifindex : 0;
970 tcm->tcm_parent = q->handle;
971 tcm->tcm_handle = q->handle;
972 tcm->tcm_info = 0;
973 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
974 if (q->ops->cl_ops->dump && q->ops->cl_ops->dump(q, cl, skb, tcm) < 0)
975 goto rtattr_failure;
976 nlh->nlmsg_len = skb->tail - b;
977 return skb->len;
978
979nlmsg_failure:
980rtattr_failure:
981 skb_trim(skb, b - skb->data);
982 return -1;
983}
984
985static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
986 struct Qdisc *q, unsigned long cl, int event)
987{
988 struct sk_buff *skb;
989 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
990
991 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
992 if (!skb)
993 return -ENOBUFS;
994
995 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
996 kfree_skb(skb);
997 return -EINVAL;
998 }
999
1000 return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1001}
1002
1003struct qdisc_dump_args
1004{
1005 struct qdisc_walker w;
1006 struct sk_buff *skb;
1007 struct netlink_callback *cb;
1008};
1009
1010static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1011{
1012 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1013
1014 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1015 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1016}
1017
1018static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1019{
1020 int t;
1021 int s_t;
1022 struct net_device *dev;
1023 struct Qdisc *q;
1024 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1025 struct qdisc_dump_args arg;
1026
1027 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1028 return 0;
1029 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1030 return 0;
1031
1032 s_t = cb->args[0];
1033 t = 0;
1034
1035 read_lock(&qdisc_tree_lock);
1036 list_for_each_entry(q, &dev->qdisc_list, list) {
1037 if (t < s_t || !q->ops->cl_ops ||
1038 (tcm->tcm_parent &&
1039 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1040 t++;
1041 continue;
1042 }
1043 if (t > s_t)
1044 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1045 arg.w.fn = qdisc_class_dump;
1046 arg.skb = skb;
1047 arg.cb = cb;
1048 arg.w.stop = 0;
1049 arg.w.skip = cb->args[1];
1050 arg.w.count = 0;
1051 q->ops->cl_ops->walk(q, &arg.w);
1052 cb->args[1] = arg.w.count;
1053 if (arg.w.stop)
1054 break;
1055 t++;
1056 }
1057 read_unlock(&qdisc_tree_lock);
1058
1059 cb->args[0] = t;
1060
1061 dev_put(dev);
1062 return skb->len;
1063}
1064
1065int psched_us_per_tick = 1;
1066int psched_tick_per_us = 1;
1067
1068#ifdef CONFIG_PROC_FS
1069static int psched_read_proc(char *buffer, char **start, off_t offset,
1070 int length, int *eof, void *data)
1071{
1072 int len;
1073
1074 len = sprintf(buffer, "%08x %08x %08x %08x\n",
1075 psched_tick_per_us, psched_us_per_tick,
1076 1000000, HZ);
1077
1078 len -= offset;
1079
1080 if (len > length)
1081 len = length;
1082 if(len < 0)
1083 len = 0;
1084
1085 *start = buffer + offset;
1086 *eof = 1;
1087
1088 return len;
1089}
1090#endif
1091
1092#if PSCHED_CLOCK_SOURCE == PSCHED_GETTIMEOFDAY
1093int psched_tod_diff(int delta_sec, int bound)
1094{
1095 int delta;
1096
1097 if (bound <= 1000000 || delta_sec > (0x7FFFFFFF/1000000)-1)
1098 return bound;
1099 delta = delta_sec * 1000000;
1100 if (delta > bound)
1101 delta = bound;
1102 return delta;
1103}
1104#endif
1105
1106psched_time_t psched_time_base;
1107
1108#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1109psched_tdiff_t psched_clock_per_hz;
1110int psched_clock_scale;
1111#endif
1112
1113#ifdef PSCHED_WATCHER
1114PSCHED_WATCHER psched_time_mark;
1115
1116static void psched_tick(unsigned long);
1117
1118static struct timer_list psched_timer =
1119 { function: psched_tick };
1120
1121static void psched_tick(unsigned long dummy)
1122{
1123#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1124 psched_time_t dummy_stamp;
1125 PSCHED_GET_TIME(dummy_stamp);
1126
1127 psched_timer.expires = jiffies + 1*HZ;
1128#else
1129 unsigned long now = jiffies;
1130 psched_time_base += ((u64)(now-psched_time_mark))<<PSCHED_JSCALE;
1131 psched_time_mark = now;
1132 psched_timer.expires = now + 60*60*HZ;
1133#endif
1134 add_timer(&psched_timer);
1135}
1136#endif
1137
1138#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1139int __init psched_calibrate_clock(void)
1140{
1141 psched_time_t stamp, stamp1;
1142 struct timeval tv, tv1;
1143 psched_tdiff_t delay;
1144 long rdelay;
1145 unsigned long stop;
1146
1147#ifdef PSCHED_WATCHER
1148 psched_tick(0);
1149#endif
1150 stop = jiffies + HZ/10;
1151 PSCHED_GET_TIME(stamp);
1152 do_gettimeofday(&tv);
1153 while (time_before(jiffies, stop)) {
1154 barrier();
1155 cpu_relax();
1156 }
1157 PSCHED_GET_TIME(stamp1);
1158 do_gettimeofday(&tv1);
1159
1160 delay = PSCHED_TDIFF(stamp1, stamp);
1161 rdelay = tv1.tv_usec - tv.tv_usec;
1162 rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
1163 if (rdelay > delay)
1164 return -1;
1165 delay /= rdelay;
1166 psched_tick_per_us = delay;
1167 while ((delay>>=1) != 0)
1168 psched_clock_scale++;
1169 psched_us_per_tick = 1<<psched_clock_scale;
1170 psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
1171 return 0;
1172}
1173#endif
1174
1175int __init pktsched_init(void)
1176{
1177 struct rtnetlink_link *link_p;
1178
1179#if PSCHED_CLOCK_SOURCE == PSCHED_CPU
1180 if (psched_calibrate_clock() < 0)
1181 return -1;
1182#elif PSCHED_CLOCK_SOURCE == PSCHED_JIFFIES
1183 psched_tick_per_us = HZ<<PSCHED_JSCALE;
1184 psched_us_per_tick = 1000000;
1185#ifdef PSCHED_WATCHER
1186 psched_tick(0);
1187#endif
1188#endif
1189
1190 link_p = rtnetlink_links[PF_UNSPEC];
1191
1192
1193
1194
1195
1196 if (link_p) {
1197 link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
1198 link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
1199 link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
1200 link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
1201 link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1202 link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1203 link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
1204 link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
1205 }
1206
1207#define INIT_QDISC(name) { \
1208 extern struct Qdisc_ops name##_qdisc_ops; \
1209 register_qdisc(& name##_qdisc_ops); \
1210 }
1211
1212 INIT_QDISC(pfifo);
1213 INIT_QDISC(bfifo);
1214
1215#ifdef CONFIG_NET_SCH_CBQ
1216 INIT_QDISC(cbq);
1217#endif
1218#ifdef CONFIG_NET_SCH_HTB
1219 INIT_QDISC(htb);
1220#endif
1221#ifdef CONFIG_NET_SCH_CSZ
1222 INIT_QDISC(csz);
1223#endif
1224#ifdef CONFIG_NET_SCH_HPFQ
1225 INIT_QDISC(hpfq);
1226#endif
1227#ifdef CONFIG_NET_SCH_HFSC
1228 INIT_QDISC(hfsc);
1229#endif
1230#ifdef CONFIG_NET_SCH_RED
1231 INIT_QDISC(red);
1232#endif
1233#ifdef CONFIG_NET_SCH_GRED
1234 INIT_QDISC(gred);
1235#endif
1236#ifdef CONFIG_NET_SCH_INGRESS
1237 INIT_QDISC(ingress);
1238#endif
1239#ifdef CONFIG_NET_SCH_DSMARK
1240 INIT_QDISC(dsmark);
1241#endif
1242#ifdef CONFIG_NET_SCH_SFQ
1243 INIT_QDISC(sfq);
1244#endif
1245#ifdef CONFIG_NET_SCH_TBF
1246 INIT_QDISC(tbf);
1247#endif
1248#ifdef CONFIG_NET_SCH_TEQL
1249 teql_init();
1250#endif
1251#ifdef CONFIG_NET_SCH_PRIO
1252 INIT_QDISC(prio);
1253#endif
1254#ifdef CONFIG_NET_SCH_ATM
1255 INIT_QDISC(atm);
1256#endif
1257#ifdef CONFIG_NET_CLS
1258 tc_filter_init();
1259#endif
1260
1261#ifdef CONFIG_PROC_FS
1262 create_proc_read_entry("net/psched", 0, 0, psched_read_proc, NULL);
1263#endif
1264
1265 return 0;
1266}
1267