linux-bk/net/sched/sch_gred.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_gred.c Generic Random Early Detection queue.
   3 *
   4 *
   5 *              This program is free software; you can redistribute it and/or
   6 *              modify it under the terms of the GNU General Public License
   7 *              as published by the Free Software Foundation; either version
   8 *              2 of the License, or (at your option) any later version.
   9 *
  10 * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
  11 *
  12 *             991129: -  Bug fix with grio mode
  13 *                     - a better sing. AvgQ mode with Grio(WRED)
  14 *                     - A finer grained VQ dequeue based on sugestion
  15 *                       from Ren Liu
  16 *                     - More error checks
  17 *
  18 *
  19 *
  20 *  For all the glorious comments look at Alexey's sch_red.c
  21 */
  22
  23#include <linux/config.h>
  24#include <linux/module.h>
  25#include <asm/uaccess.h>
  26#include <asm/system.h>
  27#include <linux/bitops.h>
  28#include <linux/types.h>
  29#include <linux/kernel.h>
  30#include <linux/sched.h>
  31#include <linux/string.h>
  32#include <linux/mm.h>
  33#include <linux/socket.h>
  34#include <linux/sockios.h>
  35#include <linux/in.h>
  36#include <linux/errno.h>
  37#include <linux/interrupt.h>
  38#include <linux/if_ether.h>
  39#include <linux/inet.h>
  40#include <linux/netdevice.h>
  41#include <linux/etherdevice.h>
  42#include <linux/notifier.h>
  43#include <net/ip.h>
  44#include <net/route.h>
  45#include <linux/skbuff.h>
  46#include <net/sock.h>
  47#include <net/pkt_sched.h>
  48
  49#if 1 /* control */
  50#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
  51#else
  52#define DPRINTK(format,args...)
  53#endif
  54
  55#if 0 /* data */
  56#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
  57#else
  58#define D2PRINTK(format,args...)
  59#endif
  60
  61struct gred_sched_data;
  62struct gred_sched;
  63
  64struct gred_sched_data
  65{
  66/* Parameters */
  67        u32             limit;          /* HARD maximal queue length    */
  68        u32             qth_min;        /* Min average length threshold: A scaled */
  69        u32             qth_max;        /* Max average length threshold: A scaled */
  70        u32             DP;             /* the drop pramaters */
  71        char            Wlog;           /* log(W)               */
  72        char            Plog;           /* random number bits   */
  73        u32             Scell_max;
  74        u32             Rmask;
  75        u32             bytesin;        /* bytes seen on virtualQ so far*/
  76        u32             packetsin;      /* packets seen on virtualQ so far*/
  77        u32             backlog;        /* bytes on the virtualQ */
  78        u32             forced; /* packets dropped for exceeding limits */
  79        u32             early;  /* packets dropped as a warning */
  80        u32             other;  /* packets dropped by invoking drop() */
  81        u32             pdrop;  /* packets dropped because we exceeded physical queue limits */
  82        char            Scell_log;
  83        u8              Stab[256];
  84        u8              prio;        /* the prio of this vq */
  85
  86/* Variables */
  87        unsigned long   qave;           /* Average queue length: A scaled */
  88        int             qcount;         /* Packets since last random number generation */
  89        u32             qR;             /* Cached random number */
  90
  91        psched_time_t   qidlestart;     /* Start of idle period */
  92};
  93
  94struct gred_sched
  95{
  96        struct gred_sched_data *tab[MAX_DPs];
  97        u32             DPs;   
  98        u32             def; 
  99        u8              initd; 
 100        u8              grio; 
 101        u8              eqp; 
 102};
 103
 104static int
 105gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 106{
 107        psched_time_t now;
 108        struct gred_sched_data *q=NULL;
 109        struct gred_sched *t= qdisc_priv(sch);
 110        unsigned long   qave=0; 
 111        int i=0;
 112
 113        if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
 114                D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
 115                goto do_enqueue;
 116        }
 117
 118
 119        if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
 120                printk("GRED: setting to default (%d)\n ",t->def);
 121                if (!(q=t->tab[t->def])) {
 122                        DPRINTK("GRED: setting to default FAILED! dropping!! "
 123                            "(%d)\n ", t->def);
 124                        goto drop;
 125                }
 126                /* fix tc_index? --could be controvesial but needed for
 127                   requeueing */
 128                skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
 129        }
 130
 131        D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
 132            "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
 133            sch->qstats.backlog);
 134        /* sum up all the qaves of prios <= to ours to get the new qave*/
 135        if (!t->eqp && t->grio) {
 136                for (i=0;i<t->DPs;i++) {
 137                        if ((!t->tab[i]) || (i==q->DP)) 
 138                                continue; 
 139                                
 140                        if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
 141                                qave +=t->tab[i]->qave;
 142                }
 143                        
 144        }
 145
 146        q->packetsin++;
 147        q->bytesin+=skb->len;
 148
 149        if (t->eqp && t->grio) {
 150                qave=0;
 151                q->qave=t->tab[t->def]->qave;
 152                q->qidlestart=t->tab[t->def]->qidlestart;
 153        }
 154
 155        if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
 156                long us_idle;
 157                PSCHED_GET_TIME(now);
 158                us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
 159                PSCHED_SET_PASTPERFECT(q->qidlestart);
 160
 161                q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
 162        } else {
 163                if (t->eqp) {
 164                        q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
 165                } else {
 166                        q->qave += q->backlog - (q->qave >> q->Wlog);
 167                }
 168
 169        }
 170        
 171
 172        if (t->eqp && t->grio) 
 173                t->tab[t->def]->qave=q->qave;
 174
 175        if ((q->qave+qave) < q->qth_min) {
 176                q->qcount = -1;
 177enqueue:
 178                if (q->backlog + skb->len <= q->limit) {
 179                        q->backlog += skb->len;
 180do_enqueue:
 181                        __skb_queue_tail(&sch->q, skb);
 182                        sch->qstats.backlog += skb->len;
 183                        sch->bstats.bytes += skb->len;
 184                        sch->bstats.packets++;
 185                        return 0;
 186                } else {
 187                        q->pdrop++;
 188                }
 189
 190drop:
 191                kfree_skb(skb);
 192                sch->qstats.drops++;
 193                return NET_XMIT_DROP;
 194        }
 195        if ((q->qave+qave) >= q->qth_max) {
 196                q->qcount = -1;
 197                sch->qstats.overlimits++;
 198                q->forced++;
 199                goto drop;
 200        }
 201        if (++q->qcount) {
 202                if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
 203                        goto enqueue;
 204                q->qcount = 0;
 205                q->qR = net_random()&q->Rmask;
 206                sch->qstats.overlimits++;
 207                q->early++;
 208                goto drop;
 209        }
 210        q->qR = net_random()&q->Rmask;
 211        goto enqueue;
 212}
 213
 214static int
 215gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
 216{
 217        struct gred_sched_data *q;
 218        struct gred_sched *t= qdisc_priv(sch);
 219        q= t->tab[(skb->tc_index&0xf)];
 220/* error checking here -- probably unnecessary */
 221        PSCHED_SET_PASTPERFECT(q->qidlestart);
 222
 223        __skb_queue_head(&sch->q, skb);
 224        sch->qstats.backlog += skb->len;
 225        sch->qstats.requeues++;
 226        q->backlog += skb->len;
 227        return 0;
 228}
 229
 230static struct sk_buff *
 231gred_dequeue(struct Qdisc* sch)
 232{
 233        struct sk_buff *skb;
 234        struct gred_sched_data *q;
 235        struct gred_sched *t= qdisc_priv(sch);
 236
 237        skb = __skb_dequeue(&sch->q);
 238        if (skb) {
 239                sch->qstats.backlog -= skb->len;
 240                q= t->tab[(skb->tc_index&0xf)];
 241                if (q) {
 242                        q->backlog -= skb->len;
 243                        if (!q->backlog && !t->eqp)
 244                                PSCHED_GET_TIME(q->qidlestart);
 245                } else {
 246                        D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
 247                }
 248                return skb;
 249        }
 250
 251        if (t->eqp) {
 252                        q= t->tab[t->def];
 253                        if (!q) 
 254                                D2PRINTK("no default VQ set: Results will be "
 255                                       "screwed up\n");
 256                        else
 257                                PSCHED_GET_TIME(q->qidlestart);
 258        }
 259
 260        return NULL;
 261}
 262
 263static unsigned int gred_drop(struct Qdisc* sch)
 264{
 265        struct sk_buff *skb;
 266
 267        struct gred_sched_data *q;
 268        struct gred_sched *t= qdisc_priv(sch);
 269
 270        skb = __skb_dequeue_tail(&sch->q);
 271        if (skb) {
 272                unsigned int len = skb->len;
 273                sch->qstats.backlog -= len;
 274                sch->qstats.drops++;
 275                q= t->tab[(skb->tc_index&0xf)];
 276                if (q) {
 277                        q->backlog -= len;
 278                        q->other++;
 279                        if (!q->backlog && !t->eqp)
 280                                PSCHED_GET_TIME(q->qidlestart);
 281                } else {
 282                        D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf); 
 283                }
 284
 285                kfree_skb(skb);
 286                return len;
 287        }
 288
 289        q=t->tab[t->def];
 290        if (!q) {
 291                D2PRINTK("no default VQ set: Results might be screwed up\n");
 292                return 0;
 293        }
 294
 295        PSCHED_GET_TIME(q->qidlestart);
 296        return 0;
 297
 298}
 299
 300static void gred_reset(struct Qdisc* sch)
 301{
 302        int i;
 303        struct gred_sched_data *q;
 304        struct gred_sched *t= qdisc_priv(sch);
 305
 306        __skb_queue_purge(&sch->q);
 307
 308        sch->qstats.backlog = 0;
 309
 310        for (i=0;i<t->DPs;i++) {
 311                q= t->tab[i];
 312                if (!q) 
 313                        continue; 
 314                PSCHED_SET_PASTPERFECT(q->qidlestart);
 315                q->qave = 0;
 316                q->qcount = -1;
 317                q->backlog = 0;
 318                q->other=0;
 319                q->forced=0;
 320                q->pdrop=0;
 321                q->early=0;
 322        }
 323}
 324
 325static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 326{
 327        struct gred_sched *table = qdisc_priv(sch);
 328        struct gred_sched_data *q;
 329        struct tc_gred_qopt *ctl;
 330        struct tc_gred_sopt *sopt;
 331        struct rtattr *tb[TCA_GRED_STAB];
 332        struct rtattr *tb2[TCA_GRED_DPS];
 333        int i;
 334
 335        if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
 336                return -EINVAL;
 337
 338        if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
 339                rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
 340
 341            if (tb2[TCA_GRED_DPS-1] == 0) 
 342                        return -EINVAL;
 343
 344                sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
 345                table->DPs=sopt->DPs;   
 346                table->def=sopt->def_DP; 
 347                table->grio=sopt->grio; 
 348                table->initd=0;
 349                /* probably need to clear all the table DP entries as well */
 350                return 0;
 351            }
 352
 353
 354        if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
 355                RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
 356                RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
 357                        return -EINVAL;
 358
 359        ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
 360        if (ctl->DP > MAX_DPs-1 ) {
 361                /* misbehaving is punished! Put in the default drop probability */
 362                DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
 363                        "set to default at %d\n",ctl->DP,table->def);
 364                ctl->DP=table->def;
 365        }
 366        
 367        if (table->tab[ctl->DP] == NULL) {
 368                table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
 369                                            GFP_KERNEL);
 370                if (NULL == table->tab[ctl->DP])
 371                        return -ENOMEM;
 372                memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
 373        }
 374        q= table->tab[ctl->DP]; 
 375
 376        if (table->grio) {
 377                if (ctl->prio <=0) {
 378                        if (table->def && table->tab[table->def]) {
 379                                DPRINTK("\nGRED: DP %u does not have a prio"
 380                                        "setting default to %d\n",ctl->DP,
 381                                        table->tab[table->def]->prio);
 382                                q->prio=table->tab[table->def]->prio;
 383                        } else { 
 384                                DPRINTK("\nGRED: DP %u does not have a prio"
 385                                        " setting default to 8\n",ctl->DP);
 386                                q->prio=8;
 387                        }
 388                } else {
 389                        q->prio=ctl->prio;
 390                }
 391        } else {
 392                q->prio=8;
 393        }
 394
 395
 396        q->DP=ctl->DP;
 397        q->Wlog = ctl->Wlog;
 398        q->Plog = ctl->Plog;
 399        q->limit = ctl->limit;
 400        q->Scell_log = ctl->Scell_log;
 401        q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
 402        q->Scell_max = (255<<q->Scell_log);
 403        q->qth_min = ctl->qth_min<<ctl->Wlog;
 404        q->qth_max = ctl->qth_max<<ctl->Wlog;
 405        q->qave=0;
 406        q->backlog=0;
 407        q->qcount = -1;
 408        q->other=0;
 409        q->forced=0;
 410        q->pdrop=0;
 411        q->early=0;
 412
 413        PSCHED_SET_PASTPERFECT(q->qidlestart);
 414        memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
 415
 416        if ( table->initd && table->grio) {
 417        /* this looks ugly but it's not in the fast path */
 418                for (i=0;i<table->DPs;i++) {
 419                        if ((!table->tab[i]) || (i==q->DP) )    
 420                                continue; 
 421                        if (table->tab[i]->prio == q->prio ){
 422                                /* WRED mode detected */
 423                                table->eqp=1;
 424                                break;
 425                        }
 426                }
 427        }
 428
 429        if (!table->initd) {
 430                table->initd=1;
 431                /* 
 432                the first entry also goes into the default until
 433                over-written 
 434                */
 435
 436                if (table->tab[table->def] == NULL) {
 437                        table->tab[table->def]=
 438                                kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
 439                        if (NULL == table->tab[table->def])
 440                                return -ENOMEM;
 441
 442                        memset(table->tab[table->def], 0,
 443                               (sizeof(struct gred_sched_data)));
 444                }
 445                q= table->tab[table->def]; 
 446                q->DP=table->def;
 447                q->Wlog = ctl->Wlog;
 448                q->Plog = ctl->Plog;
 449                q->limit = ctl->limit;
 450                q->Scell_log = ctl->Scell_log;
 451                q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
 452                q->Scell_max = (255<<q->Scell_log);
 453                q->qth_min = ctl->qth_min<<ctl->Wlog;
 454                q->qth_max = ctl->qth_max<<ctl->Wlog;
 455
 456                if (table->grio)
 457                        q->prio=table->tab[ctl->DP]->prio;
 458                else
 459                        q->prio=8;
 460
 461                q->qcount = -1;
 462                PSCHED_SET_PASTPERFECT(q->qidlestart);
 463                memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
 464        }
 465        return 0;
 466
 467}
 468
 469static int gred_init(struct Qdisc *sch, struct rtattr *opt)
 470{
 471        struct gred_sched *table = qdisc_priv(sch);
 472        struct tc_gred_sopt *sopt;
 473        struct rtattr *tb[TCA_GRED_STAB];
 474        struct rtattr *tb2[TCA_GRED_DPS];
 475
 476        if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
 477                return -EINVAL;
 478
 479        if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
 480                rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
 481
 482            if (tb2[TCA_GRED_DPS-1] == 0) 
 483                        return -EINVAL;
 484
 485                sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
 486                table->DPs=sopt->DPs;   
 487                table->def=sopt->def_DP; 
 488                table->grio=sopt->grio; 
 489                table->initd=0;
 490                return 0;
 491        }
 492
 493        DPRINTK("\n GRED_INIT error!\n");
 494        return -EINVAL;
 495}
 496
 497static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 498{
 499        unsigned long qave;
 500        struct rtattr *rta;
 501        struct tc_gred_qopt *opt = NULL ;
 502        struct tc_gred_qopt *dst;
 503        struct gred_sched *table = qdisc_priv(sch);
 504        struct gred_sched_data *q;
 505        int i;
 506        unsigned char    *b = skb->tail;
 507
 508        rta = (struct rtattr*)b;
 509        RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 510
 511        opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
 512
 513        if (opt  == NULL) {
 514                DPRINTK("gred_dump:failed to malloc for %Zd\n",
 515                    sizeof(struct tc_gred_qopt)*MAX_DPs);
 516                goto rtattr_failure;
 517        }
 518
 519        memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
 520
 521        if (!table->initd) {
 522                DPRINTK("NO GRED Queues setup!\n");
 523        }
 524
 525        for (i=0;i<MAX_DPs;i++) {
 526                dst= &opt[i]; 
 527                q= table->tab[i]; 
 528
 529                if (!q) {
 530                        /* hack -- fix at some point with proper message
 531                           This is how we indicate to tc that there is no VQ
 532                           at this DP */
 533
 534                        dst->DP=MAX_DPs+i;
 535                        continue;
 536                }
 537
 538                dst->limit=q->limit;
 539                dst->qth_min=q->qth_min>>q->Wlog;
 540                dst->qth_max=q->qth_max>>q->Wlog;
 541                dst->DP=q->DP;
 542                dst->backlog=q->backlog;
 543                if (q->qave) {
 544                        if (table->eqp && table->grio) {
 545                                q->qidlestart=table->tab[table->def]->qidlestart;
 546                                q->qave=table->tab[table->def]->qave;
 547                        }
 548                        if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
 549                                long idle;
 550                                psched_time_t now;
 551                                PSCHED_GET_TIME(now);
 552                                idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
 553                                qave  = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
 554                                dst->qave = qave >> q->Wlog;
 555
 556                        } else {
 557                                dst->qave = q->qave >> q->Wlog;
 558                        }
 559                } else {
 560                        dst->qave = 0;
 561                }
 562                
 563
 564                dst->Wlog = q->Wlog;
 565                dst->Plog = q->Plog;
 566                dst->Scell_log = q->Scell_log;
 567                dst->other = q->other;
 568                dst->forced = q->forced;
 569                dst->early = q->early;
 570                dst->pdrop = q->pdrop;
 571                dst->prio = q->prio;
 572                dst->packets=q->packetsin;
 573                dst->bytesin=q->bytesin;
 574        }
 575
 576        RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
 577        rta->rta_len = skb->tail - b;
 578
 579        kfree(opt);
 580        return skb->len;
 581
 582rtattr_failure:
 583        if (opt)
 584                kfree(opt);
 585        DPRINTK("gred_dump: FAILURE!!!!\n");
 586
 587/* also free the opt struct here */
 588        skb_trim(skb, b - skb->data);
 589        return -1;
 590}
 591
 592static void gred_destroy(struct Qdisc *sch)
 593{
 594        struct gred_sched *table = qdisc_priv(sch);
 595        int i;
 596
 597        for (i = 0;i < table->DPs; i++) {
 598                if (table->tab[i])
 599                        kfree(table->tab[i]);
 600        }
 601}
 602
 603static struct Qdisc_ops gred_qdisc_ops = {
 604        .next           =       NULL,
 605        .cl_ops         =       NULL,
 606        .id             =       "gred",
 607        .priv_size      =       sizeof(struct gred_sched),
 608        .enqueue        =       gred_enqueue,
 609        .dequeue        =       gred_dequeue,
 610        .requeue        =       gred_requeue,
 611        .drop           =       gred_drop,
 612        .init           =       gred_init,
 613        .reset          =       gred_reset,
 614        .destroy        =       gred_destroy,
 615        .change         =       gred_change,
 616        .dump           =       gred_dump,
 617        .owner          =       THIS_MODULE,
 618};
 619
 620static int __init gred_module_init(void)
 621{
 622        return register_qdisc(&gred_qdisc_ops);
 623}
 624static void __exit gred_module_exit(void) 
 625{
 626        unregister_qdisc(&gred_qdisc_ops);
 627}
 628module_init(gred_module_init)
 629module_exit(gred_module_exit)
 630MODULE_LICENSE("GPL");
 631
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.