linux/net/core/drop_monitor.c
<<
>>
Prefs
   1/*
   2 * Monitoring code for network dropped packet alerts
   3 *
   4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
   5 */
   6
   7#include <linux/netdevice.h>
   8#include <linux/etherdevice.h>
   9#include <linux/string.h>
  10#include <linux/if_arp.h>
  11#include <linux/inetdevice.h>
  12#include <linux/inet.h>
  13#include <linux/interrupt.h>
  14#include <linux/netpoll.h>
  15#include <linux/sched.h>
  16#include <linux/delay.h>
  17#include <linux/types.h>
  18#include <linux/workqueue.h>
  19#include <linux/netlink.h>
  20#include <linux/net_dropmon.h>
  21#include <linux/percpu.h>
  22#include <linux/timer.h>
  23#include <linux/bitops.h>
  24#include <linux/slab.h>
  25#include <net/genetlink.h>
  26#include <net/netevent.h>
  27
  28#include <trace/events/skb.h>
  29#include <trace/events/napi.h>
  30
  31#include <asm/unaligned.h>
  32
  33#define TRACE_ON 1
  34#define TRACE_OFF 0
  35
  36static void send_dm_alert(struct work_struct *unused);
  37
  38
  39/*
  40 * Globals, our netlink socket pointer
  41 * and the work handle that will send up
  42 * netlink alerts
  43 */
  44static int trace_state = TRACE_OFF;
  45static DEFINE_SPINLOCK(trace_state_lock);
  46
  47struct per_cpu_dm_data {
  48        struct work_struct dm_alert_work;
  49        struct sk_buff *skb;
  50        atomic_t dm_hit_count;
  51        struct timer_list send_timer;
  52};
  53
  54struct dm_hw_stat_delta {
  55        struct net_device *dev;
  56        unsigned long last_rx;
  57        struct list_head list;
  58        struct rcu_head rcu;
  59        unsigned long last_drop_val;
  60};
  61
  62static struct genl_family net_drop_monitor_family = {
  63        .id             = GENL_ID_GENERATE,
  64        .hdrsize        = 0,
  65        .name           = "NET_DM",
  66        .version        = 2,
  67        .maxattr        = NET_DM_CMD_MAX,
  68};
  69
  70static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
  71
  72static int dm_hit_limit = 64;
  73static int dm_delay = 1;
  74static unsigned long dm_hw_check_delta = 2*HZ;
  75static LIST_HEAD(hw_stats_list);
  76
  77static void reset_per_cpu_data(struct per_cpu_dm_data *data)
  78{
  79        size_t al;
  80        struct net_dm_alert_msg *msg;
  81        struct nlattr *nla;
  82
  83        al = sizeof(struct net_dm_alert_msg);
  84        al += dm_hit_limit * sizeof(struct net_dm_drop_point);
  85        al += sizeof(struct nlattr);
  86
  87        data->skb = genlmsg_new(al, GFP_KERNEL);
  88        genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
  89                        0, NET_DM_CMD_ALERT);
  90        nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
  91        msg = nla_data(nla);
  92        memset(msg, 0, al);
  93        atomic_set(&data->dm_hit_count, dm_hit_limit);
  94}
  95
  96static void send_dm_alert(struct work_struct *unused)
  97{
  98        struct sk_buff *skb;
  99        struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
 100
 101        /*
 102         * Grab the skb we're about to send
 103         */
 104        skb = data->skb;
 105
 106        /*
 107         * Replace it with a new one
 108         */
 109        reset_per_cpu_data(data);
 110
 111        /*
 112         * Ship it!
 113         */
 114        genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
 115
 116}
 117
 118/*
 119 * This is the timer function to delay the sending of an alert
 120 * in the event that more drops will arrive during the
 121 * hysteresis period.  Note that it operates under the timer interrupt
 122 * so we don't need to disable preemption here
 123 */
 124static void sched_send_work(unsigned long unused)
 125{
 126        struct per_cpu_dm_data *data =  &__get_cpu_var(dm_cpu_data);
 127
 128        schedule_work(&data->dm_alert_work);
 129}
 130
 131static void trace_drop_common(struct sk_buff *skb, void *location)
 132{
 133        struct net_dm_alert_msg *msg;
 134        struct nlmsghdr *nlh;
 135        struct nlattr *nla;
 136        int i;
 137        struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
 138
 139
 140        if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
 141                /*
 142                 * we're already at zero, discard this hit
 143                 */
 144                goto out;
 145        }
 146
 147        nlh = (struct nlmsghdr *)data->skb->data;
 148        nla = genlmsg_data(nlmsg_data(nlh));
 149        msg = nla_data(nla);
 150        for (i = 0; i < msg->entries; i++) {
 151                if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
 152                        msg->points[i].count++;
 153                        goto out;
 154                }
 155        }
 156
 157        /*
 158         * We need to create a new entry
 159         */
 160        __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
 161        nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
 162        memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
 163        msg->points[msg->entries].count = 1;
 164        msg->entries++;
 165
 166        if (!timer_pending(&data->send_timer)) {
 167                data->send_timer.expires = jiffies + dm_delay * HZ;
 168                add_timer_on(&data->send_timer, smp_processor_id());
 169        }
 170
 171out:
 172        return;
 173}
 174
 175static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
 176{
 177        trace_drop_common(skb, location);
 178}
 179
 180static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
 181{
 182        struct dm_hw_stat_delta *new_stat;
 183
 184        /*
 185         * Don't check napi structures with no associated device
 186         */
 187        if (!napi->dev)
 188                return;
 189
 190        rcu_read_lock();
 191        list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
 192                /*
 193                 * only add a note to our monitor buffer if:
 194                 * 1) this is the dev we received on
 195                 * 2) its after the last_rx delta
 196                 * 3) our rx_dropped count has gone up
 197                 */
 198                if ((new_stat->dev == napi->dev)  &&
 199                    (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
 200                    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
 201                        trace_drop_common(NULL, NULL);
 202                        new_stat->last_drop_val = napi->dev->stats.rx_dropped;
 203                        new_stat->last_rx = jiffies;
 204                        break;
 205                }
 206        }
 207        rcu_read_unlock();
 208}
 209
 210static int set_all_monitor_traces(int state)
 211{
 212        int rc = 0;
 213        struct dm_hw_stat_delta *new_stat = NULL;
 214        struct dm_hw_stat_delta *temp;
 215
 216        spin_lock(&trace_state_lock);
 217
 218        if (state == trace_state) {
 219                rc = -EAGAIN;
 220                goto out_unlock;
 221        }
 222
 223        switch (state) {
 224        case TRACE_ON:
 225                rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
 226                rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
 227                break;
 228        case TRACE_OFF:
 229                rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
 230                rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
 231
 232                tracepoint_synchronize_unregister();
 233
 234                /*
 235                 * Clean the device list
 236                 */
 237                list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
 238                        if (new_stat->dev == NULL) {
 239                                list_del_rcu(&new_stat->list);
 240                                kfree_rcu(new_stat, rcu);
 241                        }
 242                }
 243                break;
 244        default:
 245                rc = 1;
 246                break;
 247        }
 248
 249        if (!rc)
 250                trace_state = state;
 251        else
 252                rc = -EINPROGRESS;
 253
 254out_unlock:
 255        spin_unlock(&trace_state_lock);
 256
 257        return rc;
 258}
 259
 260
 261static int net_dm_cmd_config(struct sk_buff *skb,
 262                        struct genl_info *info)
 263{
 264        return -ENOTSUPP;
 265}
 266
 267static int net_dm_cmd_trace(struct sk_buff *skb,
 268                        struct genl_info *info)
 269{
 270        switch (info->genlhdr->cmd) {
 271        case NET_DM_CMD_START:
 272                return set_all_monitor_traces(TRACE_ON);
 273                break;
 274        case NET_DM_CMD_STOP:
 275                return set_all_monitor_traces(TRACE_OFF);
 276                break;
 277        }
 278
 279        return -ENOTSUPP;
 280}
 281
 282static int dropmon_net_event(struct notifier_block *ev_block,
 283                        unsigned long event, void *ptr)
 284{
 285        struct net_device *dev = ptr;
 286        struct dm_hw_stat_delta *new_stat = NULL;
 287        struct dm_hw_stat_delta *tmp;
 288
 289        switch (event) {
 290        case NETDEV_REGISTER:
 291                new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
 292
 293                if (!new_stat)
 294                        goto out;
 295
 296                new_stat->dev = dev;
 297                new_stat->last_rx = jiffies;
 298                spin_lock(&trace_state_lock);
 299                list_add_rcu(&new_stat->list, &hw_stats_list);
 300                spin_unlock(&trace_state_lock);
 301                break;
 302        case NETDEV_UNREGISTER:
 303                spin_lock(&trace_state_lock);
 304                list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
 305                        if (new_stat->dev == dev) {
 306                                new_stat->dev = NULL;
 307                                if (trace_state == TRACE_OFF) {
 308                                        list_del_rcu(&new_stat->list);
 309                                        kfree_rcu(new_stat, rcu);
 310                                        break;
 311                                }
 312                        }
 313                }
 314                spin_unlock(&trace_state_lock);
 315                break;
 316        }
 317out:
 318        return NOTIFY_DONE;
 319}
 320
 321static struct genl_ops dropmon_ops[] = {
 322        {
 323                .cmd = NET_DM_CMD_CONFIG,
 324                .doit = net_dm_cmd_config,
 325        },
 326        {
 327                .cmd = NET_DM_CMD_START,
 328                .doit = net_dm_cmd_trace,
 329        },
 330        {
 331                .cmd = NET_DM_CMD_STOP,
 332                .doit = net_dm_cmd_trace,
 333        },
 334};
 335
 336static struct notifier_block dropmon_net_notifier = {
 337        .notifier_call = dropmon_net_event
 338};
 339
 340static int __init init_net_drop_monitor(void)
 341{
 342        struct per_cpu_dm_data *data;
 343        int cpu, rc;
 344
 345        printk(KERN_INFO "Initializing network drop monitor service\n");
 346
 347        if (sizeof(void *) > 8) {
 348                printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
 349                return -ENOSPC;
 350        }
 351
 352        rc = genl_register_family_with_ops(&net_drop_monitor_family,
 353                                           dropmon_ops,
 354                                           ARRAY_SIZE(dropmon_ops));
 355        if (rc) {
 356                printk(KERN_ERR "Could not create drop monitor netlink family\n");
 357                return rc;
 358        }
 359
 360        rc = register_netdevice_notifier(&dropmon_net_notifier);
 361        if (rc < 0) {
 362                printk(KERN_CRIT "Failed to register netdevice notifier\n");
 363                goto out_unreg;
 364        }
 365
 366        rc = 0;
 367
 368        for_each_present_cpu(cpu) {
 369                data = &per_cpu(dm_cpu_data, cpu);
 370                reset_per_cpu_data(data);
 371                INIT_WORK(&data->dm_alert_work, send_dm_alert);
 372                init_timer(&data->send_timer);
 373                data->send_timer.data = cpu;
 374                data->send_timer.function = sched_send_work;
 375        }
 376
 377        goto out;
 378
 379out_unreg:
 380        genl_unregister_family(&net_drop_monitor_family);
 381out:
 382        return rc;
 383}
 384
 385late_initcall(init_net_drop_monitor);
 386
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.