linux/kernel/tracepoint.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2008 Mathieu Desnoyers
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 */
  18#include <linux/module.h>
  19#include <linux/mutex.h>
  20#include <linux/types.h>
  21#include <linux/jhash.h>
  22#include <linux/list.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tracepoint.h>
  25#include <linux/err.h>
  26#include <linux/slab.h>
  27
  28extern struct tracepoint __start___tracepoints[];
  29extern struct tracepoint __stop___tracepoints[];
  30
  31/* Set to 1 to enable tracepoint debug output */
  32static const int tracepoint_debug;
  33
  34/*
  35 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
  36 * builtin and module tracepoints and the hash table.
  37 */
  38static DEFINE_MUTEX(tracepoints_mutex);
  39
  40/*
  41 * Tracepoint hash table, containing the active tracepoints.
  42 * Protected by tracepoints_mutex.
  43 */
  44#define TRACEPOINT_HASH_BITS 6
  45#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
  46
  47/*
  48 * Note about RCU :
  49 * It is used to to delay the free of multiple probes array until a quiescent
  50 * state is reached.
  51 * Tracepoint entries modifications are protected by the tracepoints_mutex.
  52 */
  53struct tracepoint_entry {
  54        struct hlist_node hlist;
  55        void **funcs;
  56        int refcount;   /* Number of times armed. 0 if disarmed. */
  57        struct rcu_head rcu;
  58        void *oldptr;
  59        unsigned char rcu_pending:1;
  60        char name[0];
  61};
  62
  63static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
  64
  65static void free_old_closure(struct rcu_head *head)
  66{
  67        struct tracepoint_entry *entry = container_of(head,
  68                struct tracepoint_entry, rcu);
  69        kfree(entry->oldptr);
  70        /* Make sure we free the data before setting the pending flag to 0 */
  71        smp_wmb();
  72        entry->rcu_pending = 0;
  73}
  74
  75static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
  76{
  77        if (!old)
  78                return;
  79        entry->oldptr = old;
  80        entry->rcu_pending = 1;
  81        /* write rcu_pending before calling the RCU callback */
  82        smp_wmb();
  83        call_rcu_sched(&entry->rcu, free_old_closure);
  84}
  85
  86static void debug_print_probes(struct tracepoint_entry *entry)
  87{
  88        int i;
  89
  90        if (!tracepoint_debug)
  91                return;
  92
  93        for (i = 0; entry->funcs[i]; i++)
  94                printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
  95}
  96
  97static void *
  98tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
  99{
 100        int nr_probes = 0;
 101        void **old, **new;
 102
 103        WARN_ON(!probe);
 104
 105        debug_print_probes(entry);
 106        old = entry->funcs;
 107        if (old) {
 108                /* (N -> N+1), (N != 0, 1) probes */
 109                for (nr_probes = 0; old[nr_probes]; nr_probes++)
 110                        if (old[nr_probes] == probe)
 111                                return ERR_PTR(-EEXIST);
 112        }
 113        /* + 2 : one for new probe, one for NULL func */
 114        new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
 115        if (new == NULL)
 116                return ERR_PTR(-ENOMEM);
 117        if (old)
 118                memcpy(new, old, nr_probes * sizeof(void *));
 119        new[nr_probes] = probe;
 120        entry->refcount = nr_probes + 1;
 121        entry->funcs = new;
 122        debug_print_probes(entry);
 123        return old;
 124}
 125
 126static void *
 127tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
 128{
 129        int nr_probes = 0, nr_del = 0, i;
 130        void **old, **new;
 131
 132        old = entry->funcs;
 133
 134        if (!old)
 135                return NULL;
 136
 137        debug_print_probes(entry);
 138        /* (N -> M), (N > 1, M >= 0) probes */
 139        for (nr_probes = 0; old[nr_probes]; nr_probes++) {
 140                if ((!probe || old[nr_probes] == probe))
 141                        nr_del++;
 142        }
 143
 144        if (nr_probes - nr_del == 0) {
 145                /* N -> 0, (N > 1) */
 146                entry->funcs = NULL;
 147                entry->refcount = 0;
 148                debug_print_probes(entry);
 149                return old;
 150        } else {
 151                int j = 0;
 152                /* N -> M, (N > 1, M > 0) */
 153                /* + 1 for NULL */
 154                new = kzalloc((nr_probes - nr_del + 1)
 155                        * sizeof(void *), GFP_KERNEL);
 156                if (new == NULL)
 157                        return ERR_PTR(-ENOMEM);
 158                for (i = 0; old[i]; i++)
 159                        if ((probe && old[i] != probe))
 160                                new[j++] = old[i];
 161                entry->refcount = nr_probes - nr_del;
 162                entry->funcs = new;
 163        }
 164        debug_print_probes(entry);
 165        return old;
 166}
 167
 168/*
 169 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
 170 * Must be called with tracepoints_mutex held.
 171 * Returns NULL if not present.
 172 */
 173static struct tracepoint_entry *get_tracepoint(const char *name)
 174{
 175        struct hlist_head *head;
 176        struct hlist_node *node;
 177        struct tracepoint_entry *e;
 178        u32 hash = jhash(name, strlen(name), 0);
 179
 180        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 181        hlist_for_each_entry(e, node, head, hlist) {
 182                if (!strcmp(name, e->name))
 183                        return e;
 184        }
 185        return NULL;
 186}
 187
 188/*
 189 * Add the tracepoint to the tracepoint hash table. Must be called with
 190 * tracepoints_mutex held.
 191 */
 192static struct tracepoint_entry *add_tracepoint(const char *name)
 193{
 194        struct hlist_head *head;
 195        struct hlist_node *node;
 196        struct tracepoint_entry *e;
 197        size_t name_len = strlen(name) + 1;
 198        u32 hash = jhash(name, name_len-1, 0);
 199
 200        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 201        hlist_for_each_entry(e, node, head, hlist) {
 202                if (!strcmp(name, e->name)) {
 203                        printk(KERN_NOTICE
 204                                "tracepoint %s busy\n", name);
 205                        return ERR_PTR(-EEXIST);        /* Already there */
 206                }
 207        }
 208        /*
 209         * Using kmalloc here to allocate a variable length element. Could
 210         * cause some memory fragmentation if overused.
 211         */
 212        e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
 213        if (!e)
 214                return ERR_PTR(-ENOMEM);
 215        memcpy(&e->name[0], name, name_len);
 216        e->funcs = NULL;
 217        e->refcount = 0;
 218        e->rcu_pending = 0;
 219        hlist_add_head(&e->hlist, head);
 220        return e;
 221}
 222
 223/*
 224 * Remove the tracepoint from the tracepoint hash table. Must be called with
 225 * mutex_lock held.
 226 */
 227static int remove_tracepoint(const char *name)
 228{
 229        struct hlist_head *head;
 230        struct hlist_node *node;
 231        struct tracepoint_entry *e;
 232        int found = 0;
 233        size_t len = strlen(name) + 1;
 234        u32 hash = jhash(name, len-1, 0);
 235
 236        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 237        hlist_for_each_entry(e, node, head, hlist) {
 238                if (!strcmp(name, e->name)) {
 239                        found = 1;
 240                        break;
 241                }
 242        }
 243        if (!found)
 244                return -ENOENT;
 245        if (e->refcount)
 246                return -EBUSY;
 247        hlist_del(&e->hlist);
 248        /* Make sure the call_rcu_sched has been executed */
 249        if (e->rcu_pending)
 250                rcu_barrier_sched();
 251        kfree(e);
 252        return 0;
 253}
 254
 255/*
 256 * Sets the probe callback corresponding to one tracepoint.
 257 */
 258static void set_tracepoint(struct tracepoint_entry **entry,
 259        struct tracepoint *elem, int active)
 260{
 261        WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 262
 263        /*
 264         * rcu_assign_pointer has a smp_wmb() which makes sure that the new
 265         * probe callbacks array is consistent before setting a pointer to it.
 266         * This array is referenced by __DO_TRACE from
 267         * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
 268         * is used.
 269         */
 270        rcu_assign_pointer(elem->funcs, (*entry)->funcs);
 271        elem->state = active;
 272}
 273
 274/*
 275 * Disable a tracepoint and its probe callback.
 276 * Note: only waiting an RCU period after setting elem->call to the empty
 277 * function insures that the original callback is not used anymore. This insured
 278 * by preempt_disable around the call site.
 279 */
 280static void disable_tracepoint(struct tracepoint *elem)
 281{
 282        elem->state = 0;
 283}
 284
 285/**
 286 * tracepoint_update_probe_range - Update a probe range
 287 * @begin: beginning of the range
 288 * @end: end of the range
 289 *
 290 * Updates the probe callback corresponding to a range of tracepoints.
 291 */
 292void tracepoint_update_probe_range(struct tracepoint *begin,
 293        struct tracepoint *end)
 294{
 295        struct tracepoint *iter;
 296        struct tracepoint_entry *mark_entry;
 297
 298        mutex_lock(&tracepoints_mutex);
 299        for (iter = begin; iter < end; iter++) {
 300                mark_entry = get_tracepoint(iter->name);
 301                if (mark_entry) {
 302                        set_tracepoint(&mark_entry, iter,
 303                                        !!mark_entry->refcount);
 304                } else {
 305                        disable_tracepoint(iter);
 306                }
 307        }
 308        mutex_unlock(&tracepoints_mutex);
 309}
 310
 311/*
 312 * Update probes, removing the faulty probes.
 313 */
 314static void tracepoint_update_probes(void)
 315{
 316        /* Core kernel tracepoints */
 317        tracepoint_update_probe_range(__start___tracepoints,
 318                __stop___tracepoints);
 319        /* tracepoints in modules. */
 320        module_update_tracepoints();
 321}
 322
 323/**
 324 * tracepoint_probe_register -  Connect a probe to a tracepoint
 325 * @name: tracepoint name
 326 * @probe: probe handler
 327 *
 328 * Returns 0 if ok, error value on error.
 329 * The probe address must at least be aligned on the architecture pointer size.
 330 */
 331int tracepoint_probe_register(const char *name, void *probe)
 332{
 333        struct tracepoint_entry *entry;
 334        int ret = 0;
 335        void *old;
 336
 337        mutex_lock(&tracepoints_mutex);
 338        entry = get_tracepoint(name);
 339        if (!entry) {
 340                entry = add_tracepoint(name);
 341                if (IS_ERR(entry)) {
 342                        ret = PTR_ERR(entry);
 343                        goto end;
 344                }
 345        }
 346        /*
 347         * If we detect that a call_rcu_sched is pending for this tracepoint,
 348         * make sure it's executed now.
 349         */
 350        if (entry->rcu_pending)
 351                rcu_barrier_sched();
 352        old = tracepoint_entry_add_probe(entry, probe);
 353        if (IS_ERR(old)) {
 354                ret = PTR_ERR(old);
 355                goto end;
 356        }
 357        mutex_unlock(&tracepoints_mutex);
 358        tracepoint_update_probes();             /* may update entry */
 359        mutex_lock(&tracepoints_mutex);
 360        entry = get_tracepoint(name);
 361        WARN_ON(!entry);
 362        if (entry->rcu_pending)
 363                rcu_barrier_sched();
 364        tracepoint_entry_free_old(entry, old);
 365end:
 366        mutex_unlock(&tracepoints_mutex);
 367        return ret;
 368}
 369EXPORT_SYMBOL_GPL(tracepoint_probe_register);
 370
 371/**
 372 * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
 373 * @name: tracepoint name
 374 * @probe: probe function pointer
 375 *
 376 * We do not need to call a synchronize_sched to make sure the probes have
 377 * finished running before doing a module unload, because the module unload
 378 * itself uses stop_machine(), which insures that every preempt disabled section
 379 * have finished.
 380 */
 381int tracepoint_probe_unregister(const char *name, void *probe)
 382{
 383        struct tracepoint_entry *entry;
 384        void *old;
 385        int ret = -ENOENT;
 386
 387        mutex_lock(&tracepoints_mutex);
 388        entry = get_tracepoint(name);
 389        if (!entry)
 390                goto end;
 391        if (entry->rcu_pending)
 392                rcu_barrier_sched();
 393        old = tracepoint_entry_remove_probe(entry, probe);
 394        if (!old) {
 395                printk(KERN_WARNING "Warning: Trying to unregister a probe"
 396                                    "that doesn't exist\n");
 397                goto end;
 398        }
 399        mutex_unlock(&tracepoints_mutex);
 400        tracepoint_update_probes();             /* may update entry */
 401        mutex_lock(&tracepoints_mutex);
 402        entry = get_tracepoint(name);
 403        if (!entry)
 404                goto end;
 405        if (entry->rcu_pending)
 406                rcu_barrier_sched();
 407        tracepoint_entry_free_old(entry, old);
 408        remove_tracepoint(name);        /* Ignore busy error message */
 409        ret = 0;
 410end:
 411        mutex_unlock(&tracepoints_mutex);
 412        return ret;
 413}
 414EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 415
 416/**
 417 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
 418 * @tracepoint: current tracepoints (in), next tracepoint (out)
 419 * @begin: beginning of the range
 420 * @end: end of the range
 421 *
 422 * Returns whether a next tracepoint has been found (1) or not (0).
 423 * Will return the first tracepoint in the range if the input tracepoint is
 424 * NULL.
 425 */
 426int tracepoint_get_iter_range(struct tracepoint **tracepoint,
 427        struct tracepoint *begin, struct tracepoint *end)
 428{
 429        if (!*tracepoint && begin != end) {
 430                *tracepoint = begin;
 431                return 1;
 432        }
 433        if (*tracepoint >= begin && *tracepoint < end)
 434                return 1;
 435        return 0;
 436}
 437EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
 438
 439static void tracepoint_get_iter(struct tracepoint_iter *iter)
 440{
 441        int found = 0;
 442
 443        /* Core kernel tracepoints */
 444        if (!iter->module) {
 445                found = tracepoint_get_iter_range(&iter->tracepoint,
 446                                __start___tracepoints, __stop___tracepoints);
 447                if (found)
 448                        goto end;
 449        }
 450        /* tracepoints in modules. */
 451        found = module_get_iter_tracepoints(iter);
 452end:
 453        if (!found)
 454                tracepoint_iter_reset(iter);
 455}
 456
 457void tracepoint_iter_start(struct tracepoint_iter *iter)
 458{
 459        tracepoint_get_iter(iter);
 460}
 461EXPORT_SYMBOL_GPL(tracepoint_iter_start);
 462
 463void tracepoint_iter_next(struct tracepoint_iter *iter)
 464{
 465        iter->tracepoint++;
 466        /*
 467         * iter->tracepoint may be invalid because we blindly incremented it.
 468         * Make sure it is valid by marshalling on the tracepoints, getting the
 469         * tracepoints from following modules if necessary.
 470         */
 471        tracepoint_get_iter(iter);
 472}
 473EXPORT_SYMBOL_GPL(tracepoint_iter_next);
 474
 475void tracepoint_iter_stop(struct tracepoint_iter *iter)
 476{
 477}
 478EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
 479
 480void tracepoint_iter_reset(struct tracepoint_iter *iter)
 481{
 482        iter->module = NULL;
 483        iter->tracepoint = NULL;
 484}
 485EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
 486