linux/net/sched/sch_netem.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_netem.c        Network emulator
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License.
   8 *
   9 *              Many of the algorithms and ideas for this came from
  10 *              NIST Net which is not copyrighted.
  11 *
  12 * Authors:     Stephen Hemminger <shemminger@osdl.org>
  13 *              Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
  14 */
  15
  16#include <linux/mm.h>
  17#include <linux/module.h>
  18#include <linux/slab.h>
  19#include <linux/types.h>
  20#include <linux/kernel.h>
  21#include <linux/errno.h>
  22#include <linux/skbuff.h>
  23#include <linux/vmalloc.h>
  24#include <linux/rtnetlink.h>
  25#include <linux/reciprocal_div.h>
  26
  27#include <net/netlink.h>
  28#include <net/pkt_sched.h>
  29#include <net/inet_ecn.h>
  30
  31#define VERSION "1.3"
  32
  33/*      Network Emulation Queuing algorithm.
  34        ====================================
  35
  36        Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
  37                 Network Emulation Tool
  38                 [2] Luigi Rizzo, DummyNet for FreeBSD
  39
  40         ----------------------------------------------------------------
  41
  42         This started out as a simple way to delay outgoing packets to
  43         test TCP but has grown to include most of the functionality
  44         of a full blown network emulator like NISTnet. It can delay
  45         packets and add random jitter (and correlation). The random
  46         distribution can be loaded from a table as well to provide
  47         normal, Pareto, or experimental curves. Packet loss,
  48         duplication, and reordering can also be emulated.
  49
  50         This qdisc does not do classification that can be handled in
  51         layering other disciplines.  It does not need to do bandwidth
  52         control either since that can be handled by using token
  53         bucket or other rate control.
  54
  55     Correlated Loss Generator models
  56
  57        Added generation of correlated loss according to the
  58        "Gilbert-Elliot" model, a 4-state markov model.
  59
  60        References:
  61        [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
  62        [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
  63        and intuitive loss model for packet networks and its implementation
  64        in the Netem module in the Linux kernel", available in [1]
  65
  66        Authors: Stefano Salsano <stefano.salsano at uniroma2.it
  67                 Fabio Ludovici <fabio.ludovici at yahoo.it>
  68*/
  69
  70struct netem_sched_data {
  71        /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
  72
  73        /* optional qdisc for classful handling (NULL at netem init) */
  74        struct Qdisc    *qdisc;
  75
  76        struct qdisc_watchdog watchdog;
  77
  78        psched_tdiff_t latency;
  79        psched_tdiff_t jitter;
  80
  81        u32 loss;
  82        u32 ecn;
  83        u32 limit;
  84        u32 counter;
  85        u32 gap;
  86        u32 duplicate;
  87        u32 reorder;
  88        u32 corrupt;
  89        u32 rate;
  90        s32 packet_overhead;
  91        u32 cell_size;
  92        u32 cell_size_reciprocal;
  93        s32 cell_overhead;
  94
  95        struct crndstate {
  96                u32 last;
  97                u32 rho;
  98        } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
  99
 100        struct disttable {
 101                u32  size;
 102                s16 table[0];
 103        } *delay_dist;
 104
 105        enum  {
 106                CLG_RANDOM,
 107                CLG_4_STATES,
 108                CLG_GILB_ELL,
 109        } loss_model;
 110
 111        /* Correlated Loss Generation models */
 112        struct clgstate {
 113                /* state of the Markov chain */
 114                u8 state;
 115
 116                /* 4-states and Gilbert-Elliot models */
 117                u32 a1; /* p13 for 4-states or p for GE */
 118                u32 a2; /* p31 for 4-states or r for GE */
 119                u32 a3; /* p32 for 4-states or h for GE */
 120                u32 a4; /* p14 for 4-states or 1-k for GE */
 121                u32 a5; /* p23 used only in 4-states */
 122        } clg;
 123
 124};
 125
 126/* Time stamp put into socket buffer control block
 127 * Only valid when skbs are in our internal t(ime)fifo queue.
 128 */
 129struct netem_skb_cb {
 130        psched_time_t   time_to_send;
 131};
 132
 133static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
 134{
 135        qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
 136        return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 137}
 138
 139/* init_crandom - initialize correlated random number generator
 140 * Use entropy source for initial seed.
 141 */
 142static void init_crandom(struct crndstate *state, unsigned long rho)
 143{
 144        state->rho = rho;
 145        state->last = net_random();
 146}
 147
 148/* get_crandom - correlated random number generator
 149 * Next number depends on last value.
 150 * rho is scaled to avoid floating point.
 151 */
 152static u32 get_crandom(struct crndstate *state)
 153{
 154        u64 value, rho;
 155        unsigned long answer;
 156
 157        if (state->rho == 0)    /* no correlation */
 158                return net_random();
 159
 160        value = net_random();
 161        rho = (u64)state->rho + 1;
 162        answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
 163        state->last = answer;
 164        return answer;
 165}
 166
 167/* loss_4state - 4-state model loss generator
 168 * Generates losses according to the 4-state Markov chain adopted in
 169 * the GI (General and Intuitive) loss model.
 170 */
 171static bool loss_4state(struct netem_sched_data *q)
 172{
 173        struct clgstate *clg = &q->clg;
 174        u32 rnd = net_random();
 175
 176        /*
 177         * Makes a comparison between rnd and the transition
 178         * probabilities outgoing from the current state, then decides the
 179         * next state and if the next packet has to be transmitted or lost.
 180         * The four states correspond to:
 181         *   1 => successfully transmitted packets within a gap period
 182         *   4 => isolated losses within a gap period
 183         *   3 => lost packets within a burst period
 184         *   2 => successfully transmitted packets within a burst period
 185         */
 186        switch (clg->state) {
 187        case 1:
 188                if (rnd < clg->a4) {
 189                        clg->state = 4;
 190                        return true;
 191                } else if (clg->a4 < rnd && rnd < clg->a1) {
 192                        clg->state = 3;
 193                        return true;
 194                } else if (clg->a1 < rnd)
 195                        clg->state = 1;
 196
 197                break;
 198        case 2:
 199                if (rnd < clg->a5) {
 200                        clg->state = 3;
 201                        return true;
 202                } else
 203                        clg->state = 2;
 204
 205                break;
 206        case 3:
 207                if (rnd < clg->a3)
 208                        clg->state = 2;
 209                else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
 210                        clg->state = 1;
 211                        return true;
 212                } else if (clg->a2 + clg->a3 < rnd) {
 213                        clg->state = 3;
 214                        return true;
 215                }
 216                break;
 217        case 4:
 218                clg->state = 1;
 219                break;
 220        }
 221
 222        return false;
 223}
 224
 225/* loss_gilb_ell - Gilbert-Elliot model loss generator
 226 * Generates losses according to the Gilbert-Elliot loss model or
 227 * its special cases  (Gilbert or Simple Gilbert)
 228 *
 229 * Makes a comparison between random number and the transition
 230 * probabilities outgoing from the current state, then decides the
 231 * next state. A second random number is extracted and the comparison
 232 * with the loss probability of the current state decides if the next
 233 * packet will be transmitted or lost.
 234 */
 235static bool loss_gilb_ell(struct netem_sched_data *q)
 236{
 237        struct clgstate *clg = &q->clg;
 238
 239        switch (clg->state) {
 240        case 1:
 241                if (net_random() < clg->a1)
 242                        clg->state = 2;
 243                if (net_random() < clg->a4)
 244                        return true;
 245        case 2:
 246                if (net_random() < clg->a2)
 247                        clg->state = 1;
 248                if (clg->a3 > net_random())
 249                        return true;
 250        }
 251
 252        return false;
 253}
 254
 255static bool loss_event(struct netem_sched_data *q)
 256{
 257        switch (q->loss_model) {
 258        case CLG_RANDOM:
 259                /* Random packet drop 0 => none, ~0 => all */
 260                return q->loss && q->loss >= get_crandom(&q->loss_cor);
 261
 262        case CLG_4_STATES:
 263                /* 4state loss model algorithm (used also for GI model)
 264                * Extracts a value from the markov 4 state loss generator,
 265                * if it is 1 drops a packet and if needed writes the event in
 266                * the kernel logs
 267                */
 268                return loss_4state(q);
 269
 270        case CLG_GILB_ELL:
 271                /* Gilbert-Elliot loss model algorithm
 272                * Extracts a value from the Gilbert-Elliot loss generator,
 273                * if it is 1 drops a packet and if needed writes the event in
 274                * the kernel logs
 275                */
 276                return loss_gilb_ell(q);
 277        }
 278
 279        return false;   /* not reached */
 280}
 281
 282
 283/* tabledist - return a pseudo-randomly distributed value with mean mu and
 284 * std deviation sigma.  Uses table lookup to approximate the desired
 285 * distribution, and a uniformly-distributed pseudo-random source.
 286 */
 287static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 288                                struct crndstate *state,
 289                                const struct disttable *dist)
 290{
 291        psched_tdiff_t x;
 292        long t;
 293        u32 rnd;
 294
 295        if (sigma == 0)
 296                return mu;
 297
 298        rnd = get_crandom(state);
 299
 300        /* default uniform distribution */
 301        if (dist == NULL)
 302                return (rnd % (2*sigma)) - sigma + mu;
 303
 304        t = dist->table[rnd % dist->size];
 305        x = (sigma % NETEM_DIST_SCALE) * t;
 306        if (x >= 0)
 307                x += NETEM_DIST_SCALE/2;
 308        else
 309                x -= NETEM_DIST_SCALE/2;
 310
 311        return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 312}
 313
 314static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
 315{
 316        u64 ticks;
 317
 318        len += q->packet_overhead;
 319
 320        if (q->cell_size) {
 321                u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
 322
 323                if (len > cells * q->cell_size) /* extra cell needed for remainder */
 324                        cells++;
 325                len = cells * (q->cell_size + q->cell_overhead);
 326        }
 327
 328        ticks = (u64)len * NSEC_PER_SEC;
 329
 330        do_div(ticks, q->rate);
 331        return PSCHED_NS2TICKS(ticks);
 332}
 333
 334static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 335{
 336        struct sk_buff_head *list = &sch->q;
 337        psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
 338        struct sk_buff *skb = skb_peek_tail(list);
 339
 340        /* Optimize for add at tail */
 341        if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
 342                return __skb_queue_tail(list, nskb);
 343
 344        skb_queue_reverse_walk(list, skb) {
 345                if (tnext >= netem_skb_cb(skb)->time_to_send)
 346                        break;
 347        }
 348
 349        __skb_queue_after(list, skb, nskb);
 350}
 351
 352/*
 353 * Insert one skb into qdisc.
 354 * Note: parent depends on return value to account for queue length.
 355 *      NET_XMIT_DROP: queue length didn't change.
 356 *      NET_XMIT_SUCCESS: one skb was queued.
 357 */
 358static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 359{
 360        struct netem_sched_data *q = qdisc_priv(sch);
 361        /* We don't fill cb now as skb_unshare() may invalidate it */
 362        struct netem_skb_cb *cb;
 363        struct sk_buff *skb2;
 364        int count = 1;
 365
 366        /* Random duplication */
 367        if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
 368                ++count;
 369
 370        /* Drop packet? */
 371        if (loss_event(q)) {
 372                if (q->ecn && INET_ECN_set_ce(skb))
 373                        sch->qstats.drops++; /* mark packet */
 374                else
 375                        --count;
 376        }
 377        if (count == 0) {
 378                sch->qstats.drops++;
 379                kfree_skb(skb);
 380                return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 381        }
 382
 383        skb_orphan(skb);
 384
 385        /*
 386         * If we need to duplicate packet, then re-insert at top of the
 387         * qdisc tree, since parent queuer expects that only one
 388         * skb will be queued.
 389         */
 390        if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
 391                struct Qdisc *rootq = qdisc_root(sch);
 392                u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 393                q->duplicate = 0;
 394
 395                qdisc_enqueue_root(skb2, rootq);
 396                q->duplicate = dupsave;
 397        }
 398
 399        /*
 400         * Randomized packet corruption.
 401         * Make copy if needed since we are modifying
 402         * If packet is going to be hardware checksummed, then
 403         * do it now in software before we mangle it.
 404         */
 405        if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 406                if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
 407                    (skb->ip_summed == CHECKSUM_PARTIAL &&
 408                     skb_checksum_help(skb)))
 409                        return qdisc_drop(skb, sch);
 410
 411                skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
 412        }
 413
 414        if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
 415                return qdisc_reshape_fail(skb, sch);
 416
 417        sch->qstats.backlog += qdisc_pkt_len(skb);
 418
 419        cb = netem_skb_cb(skb);
 420        if (q->gap == 0 ||              /* not doing reordering */
 421            q->counter < q->gap - 1 ||  /* inside last reordering gap */
 422            q->reorder < get_crandom(&q->reorder_cor)) {
 423                psched_time_t now;
 424                psched_tdiff_t delay;
 425
 426                delay = tabledist(q->latency, q->jitter,
 427                                  &q->delay_cor, q->delay_dist);
 428
 429                now = psched_get_time();
 430
 431                if (q->rate) {
 432                        struct sk_buff_head *list = &sch->q;
 433
 434                        delay += packet_len_2_sched_time(skb->len, q);
 435
 436                        if (!skb_queue_empty(list)) {
 437                                /*
 438                                 * Last packet in queue is reference point (now).
 439                                 * First packet in queue is already in flight,
 440                                 * calculate this time bonus and substract
 441                                 * from delay.
 442                                 */
 443                                delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
 444                                now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
 445                        }
 446                }
 447
 448                cb->time_to_send = now + delay;
 449                ++q->counter;
 450                tfifo_enqueue(skb, sch);
 451        } else {
 452                /*
 453                 * Do re-ordering by putting one out of N packets at the front
 454                 * of the queue.
 455                 */
 456                cb->time_to_send = psched_get_time();
 457                q->counter = 0;
 458
 459                __skb_queue_head(&sch->q, skb);
 460                sch->qstats.requeues++;
 461        }
 462
 463        return NET_XMIT_SUCCESS;
 464}
 465
 466static unsigned int netem_drop(struct Qdisc *sch)
 467{
 468        struct netem_sched_data *q = qdisc_priv(sch);
 469        unsigned int len;
 470
 471        len = qdisc_queue_drop(sch);
 472        if (!len && q->qdisc && q->qdisc->ops->drop)
 473            len = q->qdisc->ops->drop(q->qdisc);
 474        if (len)
 475                sch->qstats.drops++;
 476
 477        return len;
 478}
 479
 480static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 481{
 482        struct netem_sched_data *q = qdisc_priv(sch);
 483        struct sk_buff *skb;
 484
 485        if (qdisc_is_throttled(sch))
 486                return NULL;
 487
 488tfifo_dequeue:
 489        skb = qdisc_peek_head(sch);
 490        if (skb) {
 491                const struct netem_skb_cb *cb = netem_skb_cb(skb);
 492
 493                /* if more time remaining? */
 494                if (cb->time_to_send <= psched_get_time()) {
 495                        __skb_unlink(skb, &sch->q);
 496                        sch->qstats.backlog -= qdisc_pkt_len(skb);
 497
 498#ifdef CONFIG_NET_CLS_ACT
 499                        /*
 500                         * If it's at ingress let's pretend the delay is
 501                         * from the network (tstamp will be updated).
 502                         */
 503                        if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
 504                                skb->tstamp.tv64 = 0;
 505#endif
 506
 507                        if (q->qdisc) {
 508                                int err = qdisc_enqueue(skb, q->qdisc);
 509
 510                                if (unlikely(err != NET_XMIT_SUCCESS)) {
 511                                        if (net_xmit_drop_count(err)) {
 512                                                sch->qstats.drops++;
 513                                                qdisc_tree_decrease_qlen(sch, 1);
 514                                        }
 515                                }
 516                                goto tfifo_dequeue;
 517                        }
 518deliver:
 519                        qdisc_unthrottled(sch);
 520                        qdisc_bstats_update(sch, skb);
 521                        return skb;
 522                }
 523
 524                if (q->qdisc) {
 525                        skb = q->qdisc->ops->dequeue(q->qdisc);
 526                        if (skb)
 527                                goto deliver;
 528                }
 529                qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 530        }
 531
 532        if (q->qdisc) {
 533                skb = q->qdisc->ops->dequeue(q->qdisc);
 534                if (skb)
 535                        goto deliver;
 536        }
 537        return NULL;
 538}
 539
 540static void netem_reset(struct Qdisc *sch)
 541{
 542        struct netem_sched_data *q = qdisc_priv(sch);
 543
 544        qdisc_reset_queue(sch);
 545        if (q->qdisc)
 546                qdisc_reset(q->qdisc);
 547        qdisc_watchdog_cancel(&q->watchdog);
 548}
 549
 550static void dist_free(struct disttable *d)
 551{
 552        if (d) {
 553                if (is_vmalloc_addr(d))
 554                        vfree(d);
 555                else
 556                        kfree(d);
 557        }
 558}
 559
 560/*
 561 * Distribution data is a variable size payload containing
 562 * signed 16 bit values.
 563 */
 564static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 565{
 566        struct netem_sched_data *q = qdisc_priv(sch);
 567        size_t n = nla_len(attr)/sizeof(__s16);
 568        const __s16 *data = nla_data(attr);
 569        spinlock_t *root_lock;
 570        struct disttable *d;
 571        int i;
 572        size_t s;
 573
 574        if (n > NETEM_DIST_MAX)
 575                return -EINVAL;
 576
 577        s = sizeof(struct disttable) + n * sizeof(s16);
 578        d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
 579        if (!d)
 580                d = vmalloc(s);
 581        if (!d)
 582                return -ENOMEM;
 583
 584        d->size = n;
 585        for (i = 0; i < n; i++)
 586                d->table[i] = data[i];
 587
 588        root_lock = qdisc_root_sleeping_lock(sch);
 589
 590        spin_lock_bh(root_lock);
 591        swap(q->delay_dist, d);
 592        spin_unlock_bh(root_lock);
 593
 594        dist_free(d);
 595        return 0;
 596}
 597
 598static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 599{
 600        struct netem_sched_data *q = qdisc_priv(sch);
 601        const struct tc_netem_corr *c = nla_data(attr);
 602
 603        init_crandom(&q->delay_cor, c->delay_corr);
 604        init_crandom(&q->loss_cor, c->loss_corr);
 605        init_crandom(&q->dup_cor, c->dup_corr);
 606}
 607
 608static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
 609{
 610        struct netem_sched_data *q = qdisc_priv(sch);
 611        const struct tc_netem_reorder *r = nla_data(attr);
 612
 613        q->reorder = r->probability;
 614        init_crandom(&q->reorder_cor, r->correlation);
 615}
 616
 617static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 618{
 619        struct netem_sched_data *q = qdisc_priv(sch);
 620        const struct tc_netem_corrupt *r = nla_data(attr);
 621
 622        q->corrupt = r->probability;
 623        init_crandom(&q->corrupt_cor, r->correlation);
 624}
 625
 626static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
 627{
 628        struct netem_sched_data *q = qdisc_priv(sch);
 629        const struct tc_netem_rate *r = nla_data(attr);
 630
 631        q->rate = r->rate;
 632        q->packet_overhead = r->packet_overhead;
 633        q->cell_size = r->cell_size;
 634        if (q->cell_size)
 635                q->cell_size_reciprocal = reciprocal_value(q->cell_size);
 636        q->cell_overhead = r->cell_overhead;
 637}
 638
 639static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 640{
 641        struct netem_sched_data *q = qdisc_priv(sch);
 642        const struct nlattr *la;
 643        int rem;
 644
 645        nla_for_each_nested(la, attr, rem) {
 646                u16 type = nla_type(la);
 647
 648                switch(type) {
 649                case NETEM_LOSS_GI: {
 650                        const struct tc_netem_gimodel *gi = nla_data(la);
 651
 652                        if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
 653                                pr_info("netem: incorrect gi model size\n");
 654                                return -EINVAL;
 655                        }
 656
 657                        q->loss_model = CLG_4_STATES;
 658
 659                        q->clg.state = 1;
 660                        q->clg.a1 = gi->p13;
 661                        q->clg.a2 = gi->p31;
 662                        q->clg.a3 = gi->p32;
 663                        q->clg.a4 = gi->p14;
 664                        q->clg.a5 = gi->p23;
 665                        break;
 666                }
 667
 668                case NETEM_LOSS_GE: {
 669                        const struct tc_netem_gemodel *ge = nla_data(la);
 670
 671                        if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
 672                                pr_info("netem: incorrect ge model size\n");
 673                                return -EINVAL;
 674                        }
 675
 676                        q->loss_model = CLG_GILB_ELL;
 677                        q->clg.state = 1;
 678                        q->clg.a1 = ge->p;
 679                        q->clg.a2 = ge->r;
 680                        q->clg.a3 = ge->h;
 681                        q->clg.a4 = ge->k1;
 682                        break;
 683                }
 684
 685                default:
 686                        pr_info("netem: unknown loss type %u\n", type);
 687                        return -EINVAL;
 688                }
 689        }
 690
 691        return 0;
 692}
 693
 694static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 695        [TCA_NETEM_CORR]        = { .len = sizeof(struct tc_netem_corr) },
 696        [TCA_NETEM_REORDER]     = { .len = sizeof(struct tc_netem_reorder) },
 697        [TCA_NETEM_CORRUPT]     = { .len = sizeof(struct tc_netem_corrupt) },
 698        [TCA_NETEM_RATE]        = { .len = sizeof(struct tc_netem_rate) },
 699        [TCA_NETEM_LOSS]        = { .type = NLA_NESTED },
 700        [TCA_NETEM_ECN]         = { .type = NLA_U32 },
 701};
 702
 703static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
 704                      const struct nla_policy *policy, int len)
 705{
 706        int nested_len = nla_len(nla) - NLA_ALIGN(len);
 707
 708        if (nested_len < 0) {
 709                pr_info("netem: invalid attributes len %d\n", nested_len);
 710                return -EINVAL;
 711        }
 712
 713        if (nested_len >= nla_attr_size(0))
 714                return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
 715                                 nested_len, policy);
 716
 717        memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 718        return 0;
 719}
 720
 721/* Parse netlink message to set options */
 722static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 723{
 724        struct netem_sched_data *q = qdisc_priv(sch);
 725        struct nlattr *tb[TCA_NETEM_MAX + 1];
 726        struct tc_netem_qopt *qopt;
 727        int ret;
 728
 729        if (opt == NULL)
 730                return -EINVAL;
 731
 732        qopt = nla_data(opt);
 733        ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
 734        if (ret < 0)
 735                return ret;
 736
 737        sch->limit = qopt->limit;
 738
 739        q->latency = qopt->latency;
 740        q->jitter = qopt->jitter;
 741        q->limit = qopt->limit;
 742        q->gap = qopt->gap;
 743        q->counter = 0;
 744        q->loss = qopt->loss;
 745        q->duplicate = qopt->duplicate;
 746
 747        /* for compatibility with earlier versions.
 748         * if gap is set, need to assume 100% probability
 749         */
 750        if (q->gap)
 751                q->reorder = ~0;
 752
 753        if (tb[TCA_NETEM_CORR])
 754                get_correlation(sch, tb[TCA_NETEM_CORR]);
 755
 756        if (tb[TCA_NETEM_DELAY_DIST]) {
 757                ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
 758                if (ret)
 759                        return ret;
 760        }
 761
 762        if (tb[TCA_NETEM_REORDER])
 763                get_reorder(sch, tb[TCA_NETEM_REORDER]);
 764
 765        if (tb[TCA_NETEM_CORRUPT])
 766                get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 767
 768        if (tb[TCA_NETEM_RATE])
 769                get_rate(sch, tb[TCA_NETEM_RATE]);
 770
 771        if (tb[TCA_NETEM_ECN])
 772                q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
 773
 774        q->loss_model = CLG_RANDOM;
 775        if (tb[TCA_NETEM_LOSS])
 776                ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
 777
 778        return ret;
 779}
 780
 781static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 782{
 783        struct netem_sched_data *q = qdisc_priv(sch);
 784        int ret;
 785
 786        if (!opt)
 787                return -EINVAL;
 788
 789        qdisc_watchdog_init(&q->watchdog, sch);
 790
 791        q->loss_model = CLG_RANDOM;
 792        ret = netem_change(sch, opt);
 793        if (ret)
 794                pr_info("netem: change failed\n");
 795        return ret;
 796}
 797
 798static void netem_destroy(struct Qdisc *sch)
 799{
 800        struct netem_sched_data *q = qdisc_priv(sch);
 801
 802        qdisc_watchdog_cancel(&q->watchdog);
 803        if (q->qdisc)
 804                qdisc_destroy(q->qdisc);
 805        dist_free(q->delay_dist);
 806}
 807
 808static int dump_loss_model(const struct netem_sched_data *q,
 809                           struct sk_buff *skb)
 810{
 811        struct nlattr *nest;
 812
 813        nest = nla_nest_start(skb, TCA_NETEM_LOSS);
 814        if (nest == NULL)
 815                goto nla_put_failure;
 816
 817        switch (q->loss_model) {
 818        case CLG_RANDOM:
 819                /* legacy loss model */
 820                nla_nest_cancel(skb, nest);
 821                return 0;       /* no data */
 822
 823        case CLG_4_STATES: {
 824                struct tc_netem_gimodel gi = {
 825                        .p13 = q->clg.a1,
 826                        .p31 = q->clg.a2,
 827                        .p32 = q->clg.a3,
 828                        .p14 = q->clg.a4,
 829                        .p23 = q->clg.a5,
 830                };
 831
 832                if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
 833                        goto nla_put_failure;
 834                break;
 835        }
 836        case CLG_GILB_ELL: {
 837                struct tc_netem_gemodel ge = {
 838                        .p = q->clg.a1,
 839                        .r = q->clg.a2,
 840                        .h = q->clg.a3,
 841                        .k1 = q->clg.a4,
 842                };
 843
 844                if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
 845                        goto nla_put_failure;
 846                break;
 847        }
 848        }
 849
 850        nla_nest_end(skb, nest);
 851        return 0;
 852
 853nla_put_failure:
 854        nla_nest_cancel(skb, nest);
 855        return -1;
 856}
 857
 858static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 859{
 860        const struct netem_sched_data *q = qdisc_priv(sch);
 861        struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
 862        struct tc_netem_qopt qopt;
 863        struct tc_netem_corr cor;
 864        struct tc_netem_reorder reorder;
 865        struct tc_netem_corrupt corrupt;
 866        struct tc_netem_rate rate;
 867
 868        qopt.latency = q->latency;
 869        qopt.jitter = q->jitter;
 870        qopt.limit = q->limit;
 871        qopt.loss = q->loss;
 872        qopt.gap = q->gap;
 873        qopt.duplicate = q->duplicate;
 874        if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
 875                goto nla_put_failure;
 876
 877        cor.delay_corr = q->delay_cor.rho;
 878        cor.loss_corr = q->loss_cor.rho;
 879        cor.dup_corr = q->dup_cor.rho;
 880        if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
 881                goto nla_put_failure;
 882
 883        reorder.probability = q->reorder;
 884        reorder.correlation = q->reorder_cor.rho;
 885        if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
 886                goto nla_put_failure;
 887
 888        corrupt.probability = q->corrupt;
 889        corrupt.correlation = q->corrupt_cor.rho;
 890        if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
 891                goto nla_put_failure;
 892
 893        rate.rate = q->rate;
 894        rate.packet_overhead = q->packet_overhead;
 895        rate.cell_size = q->cell_size;
 896        rate.cell_overhead = q->cell_overhead;
 897        if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
 898                goto nla_put_failure;
 899
 900        if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
 901                goto nla_put_failure;
 902
 903        if (dump_loss_model(q, skb) != 0)
 904                goto nla_put_failure;
 905
 906        return nla_nest_end(skb, nla);
 907
 908nla_put_failure:
 909        nlmsg_trim(skb, nla);
 910        return -1;
 911}
 912
 913static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
 914                          struct sk_buff *skb, struct tcmsg *tcm)
 915{
 916        struct netem_sched_data *q = qdisc_priv(sch);
 917
 918        if (cl != 1 || !q->qdisc)       /* only one class */
 919                return -ENOENT;
 920
 921        tcm->tcm_handle |= TC_H_MIN(1);
 922        tcm->tcm_info = q->qdisc->handle;
 923
 924        return 0;
 925}
 926
 927static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 928                     struct Qdisc **old)
 929{
 930        struct netem_sched_data *q = qdisc_priv(sch);
 931
 932        sch_tree_lock(sch);
 933        *old = q->qdisc;
 934        q->qdisc = new;
 935        if (*old) {
 936                qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 937                qdisc_reset(*old);
 938        }
 939        sch_tree_unlock(sch);
 940
 941        return 0;
 942}
 943
 944static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
 945{
 946        struct netem_sched_data *q = qdisc_priv(sch);
 947        return q->qdisc;
 948}
 949
 950static unsigned long netem_get(struct Qdisc *sch, u32 classid)
 951{
 952        return 1;
 953}
 954
 955static void netem_put(struct Qdisc *sch, unsigned long arg)
 956{
 957}
 958
 959static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 960{
 961        if (!walker->stop) {
 962                if (walker->count >= walker->skip)
 963                        if (walker->fn(sch, 1, walker) < 0) {
 964                                walker->stop = 1;
 965                                return;
 966                        }
 967                walker->count++;
 968        }
 969}
 970
 971static const struct Qdisc_class_ops netem_class_ops = {
 972        .graft          =       netem_graft,
 973        .leaf           =       netem_leaf,
 974        .get            =       netem_get,
 975        .put            =       netem_put,
 976        .walk           =       netem_walk,
 977        .dump           =       netem_dump_class,
 978};
 979
 980static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 981        .id             =       "netem",
 982        .cl_ops         =       &netem_class_ops,
 983        .priv_size      =       sizeof(struct netem_sched_data),
 984        .enqueue        =       netem_enqueue,
 985        .dequeue        =       netem_dequeue,
 986        .peek           =       qdisc_peek_dequeued,
 987        .drop           =       netem_drop,
 988        .init           =       netem_init,
 989        .reset          =       netem_reset,
 990        .destroy        =       netem_destroy,
 991        .change         =       netem_change,
 992        .dump           =       netem_dump,
 993        .owner          =       THIS_MODULE,
 994};
 995
 996
 997static int __init netem_module_init(void)
 998{
 999        pr_info("netem: version " VERSION "\n");
1000        return register_qdisc(&netem_qdisc_ops);
1001}
1002static void __exit netem_module_exit(void)
1003{
1004        unregister_qdisc(&netem_qdisc_ops);
1005}
1006module_init(netem_module_init)
1007module_exit(netem_module_exit)
1008MODULE_LICENSE("GPL");
1009
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.