linux/net/sched/sch_netem.c
<<
>>
Prefs
   1/*
   2 * net/sched/sch_netem.c        Network emulator
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License.
   8 *
   9 *              Many of the algorithms and ideas for this came from
  10 *              NIST Net which is not copyrighted.
  11 *
  12 * Authors:     Stephen Hemminger <shemminger@osdl.org>
  13 *              Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
  14 */
  15
  16#include <linux/mm.h>
  17#include <linux/module.h>
  18#include <linux/slab.h>
  19#include <linux/types.h>
  20#include <linux/kernel.h>
  21#include <linux/errno.h>
  22#include <linux/skbuff.h>
  23#include <linux/vmalloc.h>
  24#include <linux/rtnetlink.h>
  25#include <linux/reciprocal_div.h>
  26
  27#include <net/netlink.h>
  28#include <net/pkt_sched.h>
  29
  30#define VERSION "1.3"
  31
  32/*      Network Emulation Queuing algorithm.
  33        ====================================
  34
  35        Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
  36                 Network Emulation Tool
  37                 [2] Luigi Rizzo, DummyNet for FreeBSD
  38
  39         ----------------------------------------------------------------
  40
  41         This started out as a simple way to delay outgoing packets to
  42         test TCP but has grown to include most of the functionality
  43         of a full blown network emulator like NISTnet. It can delay
  44         packets and add random jitter (and correlation). The random
  45         distribution can be loaded from a table as well to provide
  46         normal, Pareto, or experimental curves. Packet loss,
  47         duplication, and reordering can also be emulated.
  48
  49         This qdisc does not do classification that can be handled in
  50         layering other disciplines.  It does not need to do bandwidth
  51         control either since that can be handled by using token
  52         bucket or other rate control.
  53
  54     Correlated Loss Generator models
  55
  56        Added generation of correlated loss according to the
  57        "Gilbert-Elliot" model, a 4-state markov model.
  58
  59        References:
  60        [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
  61        [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
  62        and intuitive loss model for packet networks and its implementation
  63        in the Netem module in the Linux kernel", available in [1]
  64
  65        Authors: Stefano Salsano <stefano.salsano at uniroma2.it
  66                 Fabio Ludovici <fabio.ludovici at yahoo.it>
  67*/
  68
  69struct netem_sched_data {
  70        /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
  71
  72        /* optional qdisc for classful handling (NULL at netem init) */
  73        struct Qdisc    *qdisc;
  74
  75        struct qdisc_watchdog watchdog;
  76
  77        psched_tdiff_t latency;
  78        psched_tdiff_t jitter;
  79
  80        u32 loss;
  81        u32 limit;
  82        u32 counter;
  83        u32 gap;
  84        u32 duplicate;
  85        u32 reorder;
  86        u32 corrupt;
  87        u32 rate;
  88        s32 packet_overhead;
  89        u32 cell_size;
  90        u32 cell_size_reciprocal;
  91        s32 cell_overhead;
  92
  93        struct crndstate {
  94                u32 last;
  95                u32 rho;
  96        } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
  97
  98        struct disttable {
  99                u32  size;
 100                s16 table[0];
 101        } *delay_dist;
 102
 103        enum  {
 104                CLG_RANDOM,
 105                CLG_4_STATES,
 106                CLG_GILB_ELL,
 107        } loss_model;
 108
 109        /* Correlated Loss Generation models */
 110        struct clgstate {
 111                /* state of the Markov chain */
 112                u8 state;
 113
 114                /* 4-states and Gilbert-Elliot models */
 115                u32 a1; /* p13 for 4-states or p for GE */
 116                u32 a2; /* p31 for 4-states or r for GE */
 117                u32 a3; /* p32 for 4-states or h for GE */
 118                u32 a4; /* p14 for 4-states or 1-k for GE */
 119                u32 a5; /* p23 used only in 4-states */
 120        } clg;
 121
 122};
 123
 124/* Time stamp put into socket buffer control block
 125 * Only valid when skbs are in our internal t(ime)fifo queue.
 126 */
 127struct netem_skb_cb {
 128        psched_time_t   time_to_send;
 129};
 130
 131static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
 132{
 133        qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
 134        return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
 135}
 136
 137/* init_crandom - initialize correlated random number generator
 138 * Use entropy source for initial seed.
 139 */
 140static void init_crandom(struct crndstate *state, unsigned long rho)
 141{
 142        state->rho = rho;
 143        state->last = net_random();
 144}
 145
 146/* get_crandom - correlated random number generator
 147 * Next number depends on last value.
 148 * rho is scaled to avoid floating point.
 149 */
 150static u32 get_crandom(struct crndstate *state)
 151{
 152        u64 value, rho;
 153        unsigned long answer;
 154
 155        if (state->rho == 0)    /* no correlation */
 156                return net_random();
 157
 158        value = net_random();
 159        rho = (u64)state->rho + 1;
 160        answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
 161        state->last = answer;
 162        return answer;
 163}
 164
 165/* loss_4state - 4-state model loss generator
 166 * Generates losses according to the 4-state Markov chain adopted in
 167 * the GI (General and Intuitive) loss model.
 168 */
 169static bool loss_4state(struct netem_sched_data *q)
 170{
 171        struct clgstate *clg = &q->clg;
 172        u32 rnd = net_random();
 173
 174        /*
 175         * Makes a comparison between rnd and the transition
 176         * probabilities outgoing from the current state, then decides the
 177         * next state and if the next packet has to be transmitted or lost.
 178         * The four states correspond to:
 179         *   1 => successfully transmitted packets within a gap period
 180         *   4 => isolated losses within a gap period
 181         *   3 => lost packets within a burst period
 182         *   2 => successfully transmitted packets within a burst period
 183         */
 184        switch (clg->state) {
 185        case 1:
 186                if (rnd < clg->a4) {
 187                        clg->state = 4;
 188                        return true;
 189                } else if (clg->a4 < rnd && rnd < clg->a1) {
 190                        clg->state = 3;
 191                        return true;
 192                } else if (clg->a1 < rnd)
 193                        clg->state = 1;
 194
 195                break;
 196        case 2:
 197                if (rnd < clg->a5) {
 198                        clg->state = 3;
 199                        return true;
 200                } else
 201                        clg->state = 2;
 202
 203                break;
 204        case 3:
 205                if (rnd < clg->a3)
 206                        clg->state = 2;
 207                else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
 208                        clg->state = 1;
 209                        return true;
 210                } else if (clg->a2 + clg->a3 < rnd) {
 211                        clg->state = 3;
 212                        return true;
 213                }
 214                break;
 215        case 4:
 216                clg->state = 1;
 217                break;
 218        }
 219
 220        return false;
 221}
 222
 223/* loss_gilb_ell - Gilbert-Elliot model loss generator
 224 * Generates losses according to the Gilbert-Elliot loss model or
 225 * its special cases  (Gilbert or Simple Gilbert)
 226 *
 227 * Makes a comparison between random number and the transition
 228 * probabilities outgoing from the current state, then decides the
 229 * next state. A second random number is extracted and the comparison
 230 * with the loss probability of the current state decides if the next
 231 * packet will be transmitted or lost.
 232 */
 233static bool loss_gilb_ell(struct netem_sched_data *q)
 234{
 235        struct clgstate *clg = &q->clg;
 236
 237        switch (clg->state) {
 238        case 1:
 239                if (net_random() < clg->a1)
 240                        clg->state = 2;
 241                if (net_random() < clg->a4)
 242                        return true;
 243        case 2:
 244                if (net_random() < clg->a2)
 245                        clg->state = 1;
 246                if (clg->a3 > net_random())
 247                        return true;
 248        }
 249
 250        return false;
 251}
 252
 253static bool loss_event(struct netem_sched_data *q)
 254{
 255        switch (q->loss_model) {
 256        case CLG_RANDOM:
 257                /* Random packet drop 0 => none, ~0 => all */
 258                return q->loss && q->loss >= get_crandom(&q->loss_cor);
 259
 260        case CLG_4_STATES:
 261                /* 4state loss model algorithm (used also for GI model)
 262                * Extracts a value from the markov 4 state loss generator,
 263                * if it is 1 drops a packet and if needed writes the event in
 264                * the kernel logs
 265                */
 266                return loss_4state(q);
 267
 268        case CLG_GILB_ELL:
 269                /* Gilbert-Elliot loss model algorithm
 270                * Extracts a value from the Gilbert-Elliot loss generator,
 271                * if it is 1 drops a packet and if needed writes the event in
 272                * the kernel logs
 273                */
 274                return loss_gilb_ell(q);
 275        }
 276
 277        return false;   /* not reached */
 278}
 279
 280
 281/* tabledist - return a pseudo-randomly distributed value with mean mu and
 282 * std deviation sigma.  Uses table lookup to approximate the desired
 283 * distribution, and a uniformly-distributed pseudo-random source.
 284 */
 285static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 286                                struct crndstate *state,
 287                                const struct disttable *dist)
 288{
 289        psched_tdiff_t x;
 290        long t;
 291        u32 rnd;
 292
 293        if (sigma == 0)
 294                return mu;
 295
 296        rnd = get_crandom(state);
 297
 298        /* default uniform distribution */
 299        if (dist == NULL)
 300                return (rnd % (2*sigma)) - sigma + mu;
 301
 302        t = dist->table[rnd % dist->size];
 303        x = (sigma % NETEM_DIST_SCALE) * t;
 304        if (x >= 0)
 305                x += NETEM_DIST_SCALE/2;
 306        else
 307                x -= NETEM_DIST_SCALE/2;
 308
 309        return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 310}
 311
 312static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
 313{
 314        u64 ticks;
 315
 316        len += q->packet_overhead;
 317
 318        if (q->cell_size) {
 319                u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
 320
 321                if (len > cells * q->cell_size) /* extra cell needed for remainder */
 322                        cells++;
 323                len = cells * (q->cell_size + q->cell_overhead);
 324        }
 325
 326        ticks = (u64)len * NSEC_PER_SEC;
 327
 328        do_div(ticks, q->rate);
 329        return PSCHED_NS2TICKS(ticks);
 330}
 331
 332static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 333{
 334        struct sk_buff_head *list = &sch->q;
 335        psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
 336        struct sk_buff *skb;
 337
 338        if (likely(skb_queue_len(list) < sch->limit)) {
 339                skb = skb_peek_tail(list);
 340                /* Optimize for add at tail */
 341                if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
 342                        return qdisc_enqueue_tail(nskb, sch);
 343
 344                skb_queue_reverse_walk(list, skb) {
 345                        if (tnext >= netem_skb_cb(skb)->time_to_send)
 346                                break;
 347                }
 348
 349                __skb_queue_after(list, skb, nskb);
 350                sch->qstats.backlog += qdisc_pkt_len(nskb);
 351                return NET_XMIT_SUCCESS;
 352        }
 353
 354        return qdisc_reshape_fail(nskb, sch);
 355}
 356
 357/*
 358 * Insert one skb into qdisc.
 359 * Note: parent depends on return value to account for queue length.
 360 *      NET_XMIT_DROP: queue length didn't change.
 361 *      NET_XMIT_SUCCESS: one skb was queued.
 362 */
 363static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 364{
 365        struct netem_sched_data *q = qdisc_priv(sch);
 366        /* We don't fill cb now as skb_unshare() may invalidate it */
 367        struct netem_skb_cb *cb;
 368        struct sk_buff *skb2;
 369        int ret;
 370        int count = 1;
 371
 372        /* Random duplication */
 373        if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
 374                ++count;
 375
 376        /* Drop packet? */
 377        if (loss_event(q))
 378                --count;
 379
 380        if (count == 0) {
 381                sch->qstats.drops++;
 382                kfree_skb(skb);
 383                return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 384        }
 385
 386        skb_orphan(skb);
 387
 388        /*
 389         * If we need to duplicate packet, then re-insert at top of the
 390         * qdisc tree, since parent queuer expects that only one
 391         * skb will be queued.
 392         */
 393        if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
 394                struct Qdisc *rootq = qdisc_root(sch);
 395                u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 396                q->duplicate = 0;
 397
 398                qdisc_enqueue_root(skb2, rootq);
 399                q->duplicate = dupsave;
 400        }
 401
 402        /*
 403         * Randomized packet corruption.
 404         * Make copy if needed since we are modifying
 405         * If packet is going to be hardware checksummed, then
 406         * do it now in software before we mangle it.
 407         */
 408        if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
 409                if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
 410                    (skb->ip_summed == CHECKSUM_PARTIAL &&
 411                     skb_checksum_help(skb))) {
 412                        sch->qstats.drops++;
 413                        return NET_XMIT_DROP;
 414                }
 415
 416                skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
 417        }
 418
 419        cb = netem_skb_cb(skb);
 420        if (q->gap == 0 ||              /* not doing reordering */
 421            q->counter < q->gap - 1 ||  /* inside last reordering gap */
 422            q->reorder < get_crandom(&q->reorder_cor)) {
 423                psched_time_t now;
 424                psched_tdiff_t delay;
 425
 426                delay = tabledist(q->latency, q->jitter,
 427                                  &q->delay_cor, q->delay_dist);
 428
 429                now = psched_get_time();
 430
 431                if (q->rate) {
 432                        struct sk_buff_head *list = &sch->q;
 433
 434                        delay += packet_len_2_sched_time(skb->len, q);
 435
 436                        if (!skb_queue_empty(list)) {
 437                                /*
 438                                 * Last packet in queue is reference point (now).
 439                                 * First packet in queue is already in flight,
 440                                 * calculate this time bonus and substract
 441                                 * from delay.
 442                                 */
 443                                delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
 444                                now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
 445                        }
 446                }
 447
 448                cb->time_to_send = now + delay;
 449                ++q->counter;
 450                ret = tfifo_enqueue(skb, sch);
 451        } else {
 452                /*
 453                 * Do re-ordering by putting one out of N packets at the front
 454                 * of the queue.
 455                 */
 456                cb->time_to_send = psched_get_time();
 457                q->counter = 0;
 458
 459                __skb_queue_head(&sch->q, skb);
 460                sch->qstats.backlog += qdisc_pkt_len(skb);
 461                sch->qstats.requeues++;
 462                ret = NET_XMIT_SUCCESS;
 463        }
 464
 465        if (ret != NET_XMIT_SUCCESS) {
 466                if (net_xmit_drop_count(ret)) {
 467                        sch->qstats.drops++;
 468                        return ret;
 469                }
 470        }
 471
 472        return NET_XMIT_SUCCESS;
 473}
 474
 475static unsigned int netem_drop(struct Qdisc *sch)
 476{
 477        struct netem_sched_data *q = qdisc_priv(sch);
 478        unsigned int len;
 479
 480        len = qdisc_queue_drop(sch);
 481        if (!len && q->qdisc && q->qdisc->ops->drop)
 482            len = q->qdisc->ops->drop(q->qdisc);
 483        if (len)
 484                sch->qstats.drops++;
 485
 486        return len;
 487}
 488
 489static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 490{
 491        struct netem_sched_data *q = qdisc_priv(sch);
 492        struct sk_buff *skb;
 493
 494        if (qdisc_is_throttled(sch))
 495                return NULL;
 496
 497tfifo_dequeue:
 498        skb = qdisc_peek_head(sch);
 499        if (skb) {
 500                const struct netem_skb_cb *cb = netem_skb_cb(skb);
 501
 502                /* if more time remaining? */
 503                if (cb->time_to_send <= psched_get_time()) {
 504                        __skb_unlink(skb, &sch->q);
 505                        sch->qstats.backlog -= qdisc_pkt_len(skb);
 506
 507#ifdef CONFIG_NET_CLS_ACT
 508                        /*
 509                         * If it's at ingress let's pretend the delay is
 510                         * from the network (tstamp will be updated).
 511                         */
 512                        if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
 513                                skb->tstamp.tv64 = 0;
 514#endif
 515
 516                        if (q->qdisc) {
 517                                int err = qdisc_enqueue(skb, q->qdisc);
 518
 519                                if (unlikely(err != NET_XMIT_SUCCESS)) {
 520                                        if (net_xmit_drop_count(err)) {
 521                                                sch->qstats.drops++;
 522                                                qdisc_tree_decrease_qlen(sch, 1);
 523                                        }
 524                                }
 525                                goto tfifo_dequeue;
 526                        }
 527deliver:
 528                        qdisc_unthrottled(sch);
 529                        qdisc_bstats_update(sch, skb);
 530                        return skb;
 531                }
 532
 533                if (q->qdisc) {
 534                        skb = q->qdisc->ops->dequeue(q->qdisc);
 535                        if (skb)
 536                                goto deliver;
 537                }
 538                qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 539        }
 540
 541        if (q->qdisc) {
 542                skb = q->qdisc->ops->dequeue(q->qdisc);
 543                if (skb)
 544                        goto deliver;
 545        }
 546        return NULL;
 547}
 548
 549static void netem_reset(struct Qdisc *sch)
 550{
 551        struct netem_sched_data *q = qdisc_priv(sch);
 552
 553        qdisc_reset_queue(sch);
 554        if (q->qdisc)
 555                qdisc_reset(q->qdisc);
 556        qdisc_watchdog_cancel(&q->watchdog);
 557}
 558
 559static void dist_free(struct disttable *d)
 560{
 561        if (d) {
 562                if (is_vmalloc_addr(d))
 563                        vfree(d);
 564                else
 565                        kfree(d);
 566        }
 567}
 568
 569/*
 570 * Distribution data is a variable size payload containing
 571 * signed 16 bit values.
 572 */
 573static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 574{
 575        struct netem_sched_data *q = qdisc_priv(sch);
 576        size_t n = nla_len(attr)/sizeof(__s16);
 577        const __s16 *data = nla_data(attr);
 578        spinlock_t *root_lock;
 579        struct disttable *d;
 580        int i;
 581        size_t s;
 582
 583        if (n > NETEM_DIST_MAX)
 584                return -EINVAL;
 585
 586        s = sizeof(struct disttable) + n * sizeof(s16);
 587        d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
 588        if (!d)
 589                d = vmalloc(s);
 590        if (!d)
 591                return -ENOMEM;
 592
 593        d->size = n;
 594        for (i = 0; i < n; i++)
 595                d->table[i] = data[i];
 596
 597        root_lock = qdisc_root_sleeping_lock(sch);
 598
 599        spin_lock_bh(root_lock);
 600        swap(q->delay_dist, d);
 601        spin_unlock_bh(root_lock);
 602
 603        dist_free(d);
 604        return 0;
 605}
 606
 607static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 608{
 609        struct netem_sched_data *q = qdisc_priv(sch);
 610        const struct tc_netem_corr *c = nla_data(attr);
 611
 612        init_crandom(&q->delay_cor, c->delay_corr);
 613        init_crandom(&q->loss_cor, c->loss_corr);
 614        init_crandom(&q->dup_cor, c->dup_corr);
 615}
 616
 617static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
 618{
 619        struct netem_sched_data *q = qdisc_priv(sch);
 620        const struct tc_netem_reorder *r = nla_data(attr);
 621
 622        q->reorder = r->probability;
 623        init_crandom(&q->reorder_cor, r->correlation);
 624}
 625
 626static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 627{
 628        struct netem_sched_data *q = qdisc_priv(sch);
 629        const struct tc_netem_corrupt *r = nla_data(attr);
 630
 631        q->corrupt = r->probability;
 632        init_crandom(&q->corrupt_cor, r->correlation);
 633}
 634
 635static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
 636{
 637        struct netem_sched_data *q = qdisc_priv(sch);
 638        const struct tc_netem_rate *r = nla_data(attr);
 639
 640        q->rate = r->rate;
 641        q->packet_overhead = r->packet_overhead;
 642        q->cell_size = r->cell_size;
 643        if (q->cell_size)
 644                q->cell_size_reciprocal = reciprocal_value(q->cell_size);
 645        q->cell_overhead = r->cell_overhead;
 646}
 647
 648static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 649{
 650        struct netem_sched_data *q = qdisc_priv(sch);
 651        const struct nlattr *la;
 652        int rem;
 653
 654        nla_for_each_nested(la, attr, rem) {
 655                u16 type = nla_type(la);
 656
 657                switch(type) {
 658                case NETEM_LOSS_GI: {
 659                        const struct tc_netem_gimodel *gi = nla_data(la);
 660
 661                        if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
 662                                pr_info("netem: incorrect gi model size\n");
 663                                return -EINVAL;
 664                        }
 665
 666                        q->loss_model = CLG_4_STATES;
 667
 668                        q->clg.state = 1;
 669                        q->clg.a1 = gi->p13;
 670                        q->clg.a2 = gi->p31;
 671                        q->clg.a3 = gi->p32;
 672                        q->clg.a4 = gi->p14;
 673                        q->clg.a5 = gi->p23;
 674                        break;
 675                }
 676
 677                case NETEM_LOSS_GE: {
 678                        const struct tc_netem_gemodel *ge = nla_data(la);
 679
 680                        if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
 681                                pr_info("netem: incorrect ge model size\n");
 682                                return -EINVAL;
 683                        }
 684
 685                        q->loss_model = CLG_GILB_ELL;
 686                        q->clg.state = 1;
 687                        q->clg.a1 = ge->p;
 688                        q->clg.a2 = ge->r;
 689                        q->clg.a3 = ge->h;
 690                        q->clg.a4 = ge->k1;
 691                        break;
 692                }
 693
 694                default:
 695                        pr_info("netem: unknown loss type %u\n", type);
 696                        return -EINVAL;
 697                }
 698        }
 699
 700        return 0;
 701}
 702
 703static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 704        [TCA_NETEM_CORR]        = { .len = sizeof(struct tc_netem_corr) },
 705        [TCA_NETEM_REORDER]     = { .len = sizeof(struct tc_netem_reorder) },
 706        [TCA_NETEM_CORRUPT]     = { .len = sizeof(struct tc_netem_corrupt) },
 707        [TCA_NETEM_RATE]        = { .len = sizeof(struct tc_netem_rate) },
 708        [TCA_NETEM_LOSS]        = { .type = NLA_NESTED },
 709};
 710
 711static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
 712                      const struct nla_policy *policy, int len)
 713{
 714        int nested_len = nla_len(nla) - NLA_ALIGN(len);
 715
 716        if (nested_len < 0) {
 717                pr_info("netem: invalid attributes len %d\n", nested_len);
 718                return -EINVAL;
 719        }
 720
 721        if (nested_len >= nla_attr_size(0))
 722                return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
 723                                 nested_len, policy);
 724
 725        memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 726        return 0;
 727}
 728
 729/* Parse netlink message to set options */
 730static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 731{
 732        struct netem_sched_data *q = qdisc_priv(sch);
 733        struct nlattr *tb[TCA_NETEM_MAX + 1];
 734        struct tc_netem_qopt *qopt;
 735        int ret;
 736
 737        if (opt == NULL)
 738                return -EINVAL;
 739
 740        qopt = nla_data(opt);
 741        ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
 742        if (ret < 0)
 743                return ret;
 744
 745        sch->limit = qopt->limit;
 746
 747        q->latency = qopt->latency;
 748        q->jitter = qopt->jitter;
 749        q->limit = qopt->limit;
 750        q->gap = qopt->gap;
 751        q->counter = 0;
 752        q->loss = qopt->loss;
 753        q->duplicate = qopt->duplicate;
 754
 755        /* for compatibility with earlier versions.
 756         * if gap is set, need to assume 100% probability
 757         */
 758        if (q->gap)
 759                q->reorder = ~0;
 760
 761        if (tb[TCA_NETEM_CORR])
 762                get_correlation(sch, tb[TCA_NETEM_CORR]);
 763
 764        if (tb[TCA_NETEM_DELAY_DIST]) {
 765                ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
 766                if (ret)
 767                        return ret;
 768        }
 769
 770        if (tb[TCA_NETEM_REORDER])
 771                get_reorder(sch, tb[TCA_NETEM_REORDER]);
 772
 773        if (tb[TCA_NETEM_CORRUPT])
 774                get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 775
 776        if (tb[TCA_NETEM_RATE])
 777                get_rate(sch, tb[TCA_NETEM_RATE]);
 778
 779        q->loss_model = CLG_RANDOM;
 780        if (tb[TCA_NETEM_LOSS])
 781                ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
 782
 783        return ret;
 784}
 785
 786static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 787{
 788        struct netem_sched_data *q = qdisc_priv(sch);
 789        int ret;
 790
 791        if (!opt)
 792                return -EINVAL;
 793
 794        qdisc_watchdog_init(&q->watchdog, sch);
 795
 796        q->loss_model = CLG_RANDOM;
 797        ret = netem_change(sch, opt);
 798        if (ret)
 799                pr_info("netem: change failed\n");
 800        return ret;
 801}
 802
 803static void netem_destroy(struct Qdisc *sch)
 804{
 805        struct netem_sched_data *q = qdisc_priv(sch);
 806
 807        qdisc_watchdog_cancel(&q->watchdog);
 808        if (q->qdisc)
 809                qdisc_destroy(q->qdisc);
 810        dist_free(q->delay_dist);
 811}
 812
 813static int dump_loss_model(const struct netem_sched_data *q,
 814                           struct sk_buff *skb)
 815{
 816        struct nlattr *nest;
 817
 818        nest = nla_nest_start(skb, TCA_NETEM_LOSS);
 819        if (nest == NULL)
 820                goto nla_put_failure;
 821
 822        switch (q->loss_model) {
 823        case CLG_RANDOM:
 824                /* legacy loss model */
 825                nla_nest_cancel(skb, nest);
 826                return 0;       /* no data */
 827
 828        case CLG_4_STATES: {
 829                struct tc_netem_gimodel gi = {
 830                        .p13 = q->clg.a1,
 831                        .p31 = q->clg.a2,
 832                        .p32 = q->clg.a3,
 833                        .p14 = q->clg.a4,
 834                        .p23 = q->clg.a5,
 835                };
 836
 837                NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
 838                break;
 839        }
 840        case CLG_GILB_ELL: {
 841                struct tc_netem_gemodel ge = {
 842                        .p = q->clg.a1,
 843                        .r = q->clg.a2,
 844                        .h = q->clg.a3,
 845                        .k1 = q->clg.a4,
 846                };
 847
 848                NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
 849                break;
 850        }
 851        }
 852
 853        nla_nest_end(skb, nest);
 854        return 0;
 855
 856nla_put_failure:
 857        nla_nest_cancel(skb, nest);
 858        return -1;
 859}
 860
 861static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 862{
 863        const struct netem_sched_data *q = qdisc_priv(sch);
 864        struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
 865        struct tc_netem_qopt qopt;
 866        struct tc_netem_corr cor;
 867        struct tc_netem_reorder reorder;
 868        struct tc_netem_corrupt corrupt;
 869        struct tc_netem_rate rate;
 870
 871        qopt.latency = q->latency;
 872        qopt.jitter = q->jitter;
 873        qopt.limit = q->limit;
 874        qopt.loss = q->loss;
 875        qopt.gap = q->gap;
 876        qopt.duplicate = q->duplicate;
 877        NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
 878
 879        cor.delay_corr = q->delay_cor.rho;
 880        cor.loss_corr = q->loss_cor.rho;
 881        cor.dup_corr = q->dup_cor.rho;
 882        NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
 883
 884        reorder.probability = q->reorder;
 885        reorder.correlation = q->reorder_cor.rho;
 886        NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
 887
 888        corrupt.probability = q->corrupt;
 889        corrupt.correlation = q->corrupt_cor.rho;
 890        NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 891
 892        rate.rate = q->rate;
 893        rate.packet_overhead = q->packet_overhead;
 894        rate.cell_size = q->cell_size;
 895        rate.cell_overhead = q->cell_overhead;
 896        NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
 897
 898        if (dump_loss_model(q, skb) != 0)
 899                goto nla_put_failure;
 900
 901        return nla_nest_end(skb, nla);
 902
 903nla_put_failure:
 904        nlmsg_trim(skb, nla);
 905        return -1;
 906}
 907
 908static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
 909                          struct sk_buff *skb, struct tcmsg *tcm)
 910{
 911        struct netem_sched_data *q = qdisc_priv(sch);
 912
 913        if (cl != 1 || !q->qdisc)       /* only one class */
 914                return -ENOENT;
 915
 916        tcm->tcm_handle |= TC_H_MIN(1);
 917        tcm->tcm_info = q->qdisc->handle;
 918
 919        return 0;
 920}
 921
 922static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 923                     struct Qdisc **old)
 924{
 925        struct netem_sched_data *q = qdisc_priv(sch);
 926
 927        sch_tree_lock(sch);
 928        *old = q->qdisc;
 929        q->qdisc = new;
 930        if (*old) {
 931                qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 932                qdisc_reset(*old);
 933        }
 934        sch_tree_unlock(sch);
 935
 936        return 0;
 937}
 938
 939static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
 940{
 941        struct netem_sched_data *q = qdisc_priv(sch);
 942        return q->qdisc;
 943}
 944
 945static unsigned long netem_get(struct Qdisc *sch, u32 classid)
 946{
 947        return 1;
 948}
 949
 950static void netem_put(struct Qdisc *sch, unsigned long arg)
 951{
 952}
 953
 954static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 955{
 956        if (!walker->stop) {
 957                if (walker->count >= walker->skip)
 958                        if (walker->fn(sch, 1, walker) < 0) {
 959                                walker->stop = 1;
 960                                return;
 961                        }
 962                walker->count++;
 963        }
 964}
 965
 966static const struct Qdisc_class_ops netem_class_ops = {
 967        .graft          =       netem_graft,
 968        .leaf           =       netem_leaf,
 969        .get            =       netem_get,
 970        .put            =       netem_put,
 971        .walk           =       netem_walk,
 972        .dump           =       netem_dump_class,
 973};
 974
 975static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 976        .id             =       "netem",
 977        .cl_ops         =       &netem_class_ops,
 978        .priv_size      =       sizeof(struct netem_sched_data),
 979        .enqueue        =       netem_enqueue,
 980        .dequeue        =       netem_dequeue,
 981        .peek           =       qdisc_peek_dequeued,
 982        .drop           =       netem_drop,
 983        .init           =       netem_init,
 984        .reset          =       netem_reset,
 985        .destroy        =       netem_destroy,
 986        .change         =       netem_change,
 987        .dump           =       netem_dump,
 988        .owner          =       THIS_MODULE,
 989};
 990
 991
 992static int __init netem_module_init(void)
 993{
 994        pr_info("netem: version " VERSION "\n");
 995        return register_qdisc(&netem_qdisc_ops);
 996}
 997static void __exit netem_module_exit(void)
 998{
 999        unregister_qdisc(&netem_qdisc_ops);
1000}
1001module_init(netem_module_init)
1002module_exit(netem_module_exit)
1003MODULE_LICENSE("GPL");
1004
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.