linux-bk/net/core/gen_estimator.c
<<
>>
Prefs
   1/*
   2 * net/sched/gen_estimator.c    Simple rate estimator.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 * Changes:
  12 *              Jamal Hadi Salim - moved it to net/core and reshulfed
  13 *              names to make it usable in general net subsystem.
  14 */
  15
  16#include <asm/uaccess.h>
  17#include <asm/system.h>
  18#include <asm/bitops.h>
  19#include <linux/module.h>
  20#include <linux/types.h>
  21#include <linux/kernel.h>
  22#include <linux/jiffies.h>
  23#include <linux/string.h>
  24#include <linux/mm.h>
  25#include <linux/socket.h>
  26#include <linux/sockios.h>
  27#include <linux/in.h>
  28#include <linux/errno.h>
  29#include <linux/interrupt.h>
  30#include <linux/netdevice.h>
  31#include <linux/skbuff.h>
  32#include <linux/rtnetlink.h>
  33#include <linux/init.h>
  34#include <net/sock.h>
  35#include <net/gen_stats.h>
  36
  37/*
  38   This code is NOT intended to be used for statistics collection,
  39   its purpose is to provide a base for statistical multiplexing
  40   for controlled load service.
  41   If you need only statistics, run a user level daemon which
  42   periodically reads byte counters.
  43
  44   Unfortunately, rate estimation is not a very easy task.
  45   F.e. I did not find a simple way to estimate the current peak rate
  46   and even failed to formulate the problem 8)8)
  47
  48   So I preferred not to built an estimator into the scheduler,
  49   but run this task separately.
  50   Ideally, it should be kernel thread(s), but for now it runs
  51   from timers, which puts apparent top bounds on the number of rated
  52   flows, has minimal overhead on small, but is enough
  53   to handle controlled load service, sets of aggregates.
  54
  55   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
  56
  57   avrate = avrate*(1-W) + rate*W
  58
  59   where W is chosen as negative power of 2: W = 2^(-ewma_log)
  60
  61   The resulting time constant is:
  62
  63   T = A/(-ln(1-W))
  64
  65
  66   NOTES.
  67
  68   * The stored value for avbps is scaled by 2^5, so that maximal
  69     rate is ~1Gbit, avpps is scaled by 2^10.
  70
  71   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
  72     for HZ=100 and HZ=1024 8)), maximal interval
  73     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
  74     are too expensive, longer ones can be implemented
  75     at user level painlessly.
  76 */
  77
  78#define EST_MAX_INTERVAL        5
  79
  80struct gen_estimator
  81{
  82        struct gen_estimator    *next;
  83        struct gnet_stats_basic *bstats;
  84        struct gnet_stats_rate_est      *rate_est;
  85        spinlock_t              *stats_lock;
  86        unsigned                interval;
  87        int                     ewma_log;
  88        u64                     last_bytes;
  89        u32                     last_packets;
  90        u32                     avpps;
  91        u32                     avbps;
  92};
  93
  94struct gen_estimator_head
  95{
  96        struct timer_list       timer;
  97        struct gen_estimator    *list;
  98};
  99
 100static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
 101
 102/* Estimator array lock */
 103static DEFINE_RWLOCK(est_lock);
 104
 105static void est_timer(unsigned long arg)
 106{
 107        int idx = (int)arg;
 108        struct gen_estimator *e;
 109
 110        read_lock(&est_lock);
 111        for (e = elist[idx].list; e; e = e->next) {
 112                u64 nbytes;
 113                u32 npackets;
 114                u32 rate;
 115
 116                spin_lock(e->stats_lock);
 117                nbytes = e->bstats->bytes;
 118                npackets = e->bstats->packets;
 119                rate = (nbytes - e->last_bytes)<<(7 - idx);
 120                e->last_bytes = nbytes;
 121                e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
 122                e->rate_est->bps = (e->avbps+0xF)>>5;
 123
 124                rate = (npackets - e->last_packets)<<(12 - idx);
 125                e->last_packets = npackets;
 126                e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
 127                e->rate_est->pps = (e->avpps+0x1FF)>>10;
 128                spin_unlock(e->stats_lock);
 129        }
 130
 131        mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
 132        read_unlock(&est_lock);
 133}
 134
 135/**
 136 * gen_new_estimator - create a new rate estimator
 137 * @bstats: basic statistics
 138 * @rate_est: rate estimator statistics
 139 * @stats_lock: statistics lock
 140 * @opt: rate estimator configuration TLV
 141 *
 142 * Creates a new rate estimator with &bstats as source and &rate_est
 143 * as destination. A new timer with the interval specified in the
 144 * configuration TLV is created. Upon each interval, the latest statistics
 145 * will be read from &bstats and the estimated rate will be stored in
 146 * &rate_est with the statistics lock grabed during this period.
 147 * 
 148 * Returns 0 on success or a negative error code.
 149 */
 150int gen_new_estimator(struct gnet_stats_basic *bstats,
 151        struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct rtattr *opt)
 152{
 153        struct gen_estimator *est;
 154        struct gnet_estimator *parm = RTA_DATA(opt);
 155
 156        if (RTA_PAYLOAD(opt) < sizeof(*parm))
 157                return -EINVAL;
 158
 159        if (parm->interval < -2 || parm->interval > 3)
 160                return -EINVAL;
 161
 162        est = kmalloc(sizeof(*est), GFP_KERNEL);
 163        if (est == NULL)
 164                return -ENOBUFS;
 165
 166        memset(est, 0, sizeof(*est));
 167        est->interval = parm->interval + 2;
 168        est->bstats = bstats;
 169        est->rate_est = rate_est;
 170        est->stats_lock = stats_lock;
 171        est->ewma_log = parm->ewma_log;
 172        est->last_bytes = bstats->bytes;
 173        est->avbps = rate_est->bps<<5;
 174        est->last_packets = bstats->packets;
 175        est->avpps = rate_est->pps<<10;
 176
 177        est->next = elist[est->interval].list;
 178        if (est->next == NULL) {
 179                init_timer(&elist[est->interval].timer);
 180                elist[est->interval].timer.data = est->interval;
 181                elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
 182                elist[est->interval].timer.function = est_timer;
 183                add_timer(&elist[est->interval].timer);
 184        }
 185        write_lock_bh(&est_lock);
 186        elist[est->interval].list = est;
 187        write_unlock_bh(&est_lock);
 188        return 0;
 189}
 190
 191/**
 192 * gen_kill_estimator - remove a rate estimator
 193 * @bstats: basic statistics
 194 * @rate_est: rate estimator statistics
 195 *
 196 * Removes the rate estimator specified by &bstats and &rate_est
 197 * and deletes the timer.
 198 */
 199void gen_kill_estimator(struct gnet_stats_basic *bstats,
 200        struct gnet_stats_rate_est *rate_est)
 201{
 202        int idx;
 203        struct gen_estimator *est, **pest;
 204
 205        for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
 206                int killed = 0;
 207                pest = &elist[idx].list;
 208                while ((est=*pest) != NULL) {
 209                        if (est->rate_est != rate_est || est->bstats != bstats) {
 210                                pest = &est->next;
 211                                continue;
 212                        }
 213
 214                        write_lock_bh(&est_lock);
 215                        *pest = est->next;
 216                        write_unlock_bh(&est_lock);
 217
 218                        kfree(est);
 219                        killed++;
 220                }
 221                if (killed && elist[idx].list == NULL)
 222                        del_timer(&elist[idx].timer);
 223        }
 224}
 225
 226/**
 227 * gen_replace_estimator - replace rate estimator configruation
 228 * @bstats: basic statistics
 229 * @rate_est: rate estimator statistics
 230 * @stats_lock: statistics lock
 231 * @opt: rate estimator configuration TLV
 232 *
 233 * Replaces the configuration of a rate estimator by calling
 234 * gen_kill_estimator() and gen_new_estimator().
 235 * 
 236 * Returns 0 on success or a negative error code.
 237 */
 238int
 239gen_replace_estimator(struct gnet_stats_basic *bstats,
 240        struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock,
 241        struct rtattr *opt)
 242{
 243    gen_kill_estimator(bstats, rate_est);
 244    return gen_new_estimator(bstats, rate_est, stats_lock, opt);
 245}
 246    
 247
 248EXPORT_SYMBOL(gen_kill_estimator);
 249EXPORT_SYMBOL(gen_new_estimator);
 250EXPORT_SYMBOL(gen_replace_estimator);
 251
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.