linux/net/core/gen_estimator.c
<<
>>
Prefs
   1/*
   2 * net/sched/gen_estimator.c    Simple rate estimator.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 * Changes:
  12 *              Jamal Hadi Salim - moved it to net/core and reshulfed
  13 *              names to make it usable in general net subsystem.
  14 */
  15
  16#include <asm/uaccess.h>
  17#include <asm/system.h>
  18#include <linux/bitops.h>
  19#include <linux/module.h>
  20#include <linux/types.h>
  21#include <linux/kernel.h>
  22#include <linux/jiffies.h>
  23#include <linux/string.h>
  24#include <linux/mm.h>
  25#include <linux/socket.h>
  26#include <linux/sockios.h>
  27#include <linux/in.h>
  28#include <linux/errno.h>
  29#include <linux/interrupt.h>
  30#include <linux/netdevice.h>
  31#include <linux/skbuff.h>
  32#include <linux/rtnetlink.h>
  33#include <linux/init.h>
  34#include <linux/rbtree.h>
  35#include <net/sock.h>
  36#include <net/gen_stats.h>
  37
  38/*
  39   This code is NOT intended to be used for statistics collection,
  40   its purpose is to provide a base for statistical multiplexing
  41   for controlled load service.
  42   If you need only statistics, run a user level daemon which
  43   periodically reads byte counters.
  44
  45   Unfortunately, rate estimation is not a very easy task.
  46   F.e. I did not find a simple way to estimate the current peak rate
  47   and even failed to formulate the problem 8)8)
  48
  49   So I preferred not to built an estimator into the scheduler,
  50   but run this task separately.
  51   Ideally, it should be kernel thread(s), but for now it runs
  52   from timers, which puts apparent top bounds on the number of rated
  53   flows, has minimal overhead on small, but is enough
  54   to handle controlled load service, sets of aggregates.
  55
  56   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
  57
  58   avrate = avrate*(1-W) + rate*W
  59
  60   where W is chosen as negative power of 2: W = 2^(-ewma_log)
  61
  62   The resulting time constant is:
  63
  64   T = A/(-ln(1-W))
  65
  66
  67   NOTES.
  68
  69   * avbps is scaled by 2^5, avpps is scaled by 2^10.
  70   * both values are reported as 32 bit unsigned values. bps can
  71     overflow for fast links : max speed being 34360Mbit/sec
  72   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
  73     for HZ=100 and HZ=1024 8)), maximal interval
  74     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
  75     are too expensive, longer ones can be implemented
  76     at user level painlessly.
  77 */
  78
  79#define EST_MAX_INTERVAL        5
  80
  81struct gen_estimator
  82{
  83        struct list_head        list;
  84        struct gnet_stats_basic *bstats;
  85        struct gnet_stats_rate_est      *rate_est;
  86        spinlock_t              *stats_lock;
  87        int                     ewma_log;
  88        u64                     last_bytes;
  89        u64                     avbps;
  90        u32                     last_packets;
  91        u32                     avpps;
  92        struct rcu_head         e_rcu;
  93        struct rb_node          node;
  94};
  95
  96struct gen_estimator_head
  97{
  98        struct timer_list       timer;
  99        struct list_head        list;
 100};
 101
 102static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
 103
 104/* Protects against NULL dereference */
 105static DEFINE_RWLOCK(est_lock);
 106
 107/* Protects against soft lockup during large deletion */
 108static struct rb_root est_root = RB_ROOT;
 109
 110static void est_timer(unsigned long arg)
 111{
 112        int idx = (int)arg;
 113        struct gen_estimator *e;
 114
 115        rcu_read_lock();
 116        list_for_each_entry_rcu(e, &elist[idx].list, list) {
 117                u64 nbytes;
 118                u64 brate;
 119                u32 npackets;
 120                u32 rate;
 121
 122                spin_lock(e->stats_lock);
 123                read_lock(&est_lock);
 124                if (e->bstats == NULL)
 125                        goto skip;
 126
 127                nbytes = e->bstats->bytes;
 128                npackets = e->bstats->packets;
 129                brate = (nbytes - e->last_bytes)<<(7 - idx);
 130                e->last_bytes = nbytes;
 131                e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log;
 132                e->rate_est->bps = (e->avbps+0xF)>>5;
 133
 134                rate = (npackets - e->last_packets)<<(12 - idx);
 135                e->last_packets = npackets;
 136                e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
 137                e->rate_est->pps = (e->avpps+0x1FF)>>10;
 138skip:
 139                read_unlock(&est_lock);
 140                spin_unlock(e->stats_lock);
 141        }
 142
 143        if (!list_empty(&elist[idx].list))
 144                mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
 145        rcu_read_unlock();
 146}
 147
 148static void gen_add_node(struct gen_estimator *est)
 149{
 150        struct rb_node **p = &est_root.rb_node, *parent = NULL;
 151
 152        while (*p) {
 153                struct gen_estimator *e;
 154
 155                parent = *p;
 156                e = rb_entry(parent, struct gen_estimator, node);
 157
 158                if (est->bstats > e->bstats)
 159                        p = &parent->rb_right;
 160                else
 161                        p = &parent->rb_left;
 162        }
 163        rb_link_node(&est->node, parent, p);
 164        rb_insert_color(&est->node, &est_root);
 165}
 166
 167static
 168struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
 169                                    const struct gnet_stats_rate_est *rate_est)
 170{
 171        struct rb_node *p = est_root.rb_node;
 172
 173        while (p) {
 174                struct gen_estimator *e;
 175
 176                e = rb_entry(p, struct gen_estimator, node);
 177
 178                if (bstats > e->bstats)
 179                        p = p->rb_right;
 180                else if (bstats < e->bstats || rate_est != e->rate_est)
 181                        p = p->rb_left;
 182                else
 183                        return e;
 184        }
 185        return NULL;
 186}
 187
 188/**
 189 * gen_new_estimator - create a new rate estimator
 190 * @bstats: basic statistics
 191 * @rate_est: rate estimator statistics
 192 * @stats_lock: statistics lock
 193 * @opt: rate estimator configuration TLV
 194 *
 195 * Creates a new rate estimator with &bstats as source and &rate_est
 196 * as destination. A new timer with the interval specified in the
 197 * configuration TLV is created. Upon each interval, the latest statistics
 198 * will be read from &bstats and the estimated rate will be stored in
 199 * &rate_est with the statistics lock grabed during this period.
 200 *
 201 * Returns 0 on success or a negative error code.
 202 *
 203 * NOTE: Called under rtnl_mutex
 204 */
 205int gen_new_estimator(struct gnet_stats_basic *bstats,
 206                      struct gnet_stats_rate_est *rate_est,
 207                      spinlock_t *stats_lock,
 208                      struct nlattr *opt)
 209{
 210        struct gen_estimator *est;
 211        struct gnet_estimator *parm = nla_data(opt);
 212        int idx;
 213
 214        if (nla_len(opt) < sizeof(*parm))
 215                return -EINVAL;
 216
 217        if (parm->interval < -2 || parm->interval > 3)
 218                return -EINVAL;
 219
 220        est = kzalloc(sizeof(*est), GFP_KERNEL);
 221        if (est == NULL)
 222                return -ENOBUFS;
 223
 224        idx = parm->interval + 2;
 225        est->bstats = bstats;
 226        est->rate_est = rate_est;
 227        est->stats_lock = stats_lock;
 228        est->ewma_log = parm->ewma_log;
 229        est->last_bytes = bstats->bytes;
 230        est->avbps = rate_est->bps<<5;
 231        est->last_packets = bstats->packets;
 232        est->avpps = rate_est->pps<<10;
 233
 234        if (!elist[idx].timer.function) {
 235                INIT_LIST_HEAD(&elist[idx].list);
 236                setup_timer(&elist[idx].timer, est_timer, idx);
 237        }
 238
 239        if (list_empty(&elist[idx].list))
 240                mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
 241
 242        list_add_rcu(&est->list, &elist[idx].list);
 243        gen_add_node(est);
 244
 245        return 0;
 246}
 247EXPORT_SYMBOL(gen_new_estimator);
 248
 249static void __gen_kill_estimator(struct rcu_head *head)
 250{
 251        struct gen_estimator *e = container_of(head,
 252                                        struct gen_estimator, e_rcu);
 253        kfree(e);
 254}
 255
 256/**
 257 * gen_kill_estimator - remove a rate estimator
 258 * @bstats: basic statistics
 259 * @rate_est: rate estimator statistics
 260 *
 261 * Removes the rate estimator specified by &bstats and &rate_est.
 262 *
 263 * NOTE: Called under rtnl_mutex
 264 */
 265void gen_kill_estimator(struct gnet_stats_basic *bstats,
 266                        struct gnet_stats_rate_est *rate_est)
 267{
 268        struct gen_estimator *e;
 269
 270        while ((e = gen_find_node(bstats, rate_est))) {
 271                rb_erase(&e->node, &est_root);
 272
 273                write_lock_bh(&est_lock);
 274                e->bstats = NULL;
 275                write_unlock_bh(&est_lock);
 276
 277                list_del_rcu(&e->list);
 278                call_rcu(&e->e_rcu, __gen_kill_estimator);
 279        }
 280}
 281EXPORT_SYMBOL(gen_kill_estimator);
 282
 283/**
 284 * gen_replace_estimator - replace rate estimator configuration
 285 * @bstats: basic statistics
 286 * @rate_est: rate estimator statistics
 287 * @stats_lock: statistics lock
 288 * @opt: rate estimator configuration TLV
 289 *
 290 * Replaces the configuration of a rate estimator by calling
 291 * gen_kill_estimator() and gen_new_estimator().
 292 *
 293 * Returns 0 on success or a negative error code.
 294 */
 295int gen_replace_estimator(struct gnet_stats_basic *bstats,
 296                          struct gnet_stats_rate_est *rate_est,
 297                          spinlock_t *stats_lock, struct nlattr *opt)
 298{
 299        gen_kill_estimator(bstats, rate_est);
 300        return gen_new_estimator(bstats, rate_est, stats_lock, opt);
 301}
 302EXPORT_SYMBOL(gen_replace_estimator);
 303
 304/**
 305 * gen_estimator_active - test if estimator is currently in use
 306 * @bstats: basic statistics
 307 * @rate_est: rate estimator statistics
 308 *
 309 * Returns true if estimator is active, and false if not.
 310 */
 311bool gen_estimator_active(const struct gnet_stats_basic *bstats,
 312                          const struct gnet_stats_rate_est *rate_est)
 313{
 314        ASSERT_RTNL();
 315
 316        return gen_find_node(bstats, rate_est) != NULL;
 317}
 318EXPORT_SYMBOL(gen_estimator_active);
 319