linux/kernel/tracepoint.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2008 Mathieu Desnoyers
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 */
  18#include <linux/module.h>
  19#include <linux/mutex.h>
  20#include <linux/types.h>
  21#include <linux/jhash.h>
  22#include <linux/list.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tracepoint.h>
  25#include <linux/err.h>
  26#include <linux/slab.h>
  27#include <linux/sched.h>
  28
  29extern struct tracepoint __start___tracepoints[];
  30extern struct tracepoint __stop___tracepoints[];
  31
  32/* Set to 1 to enable tracepoint debug output */
  33static const int tracepoint_debug;
  34
  35/*
  36 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
  37 * builtin and module tracepoints and the hash table.
  38 */
  39static DEFINE_MUTEX(tracepoints_mutex);
  40
  41/*
  42 * Tracepoint hash table, containing the active tracepoints.
  43 * Protected by tracepoints_mutex.
  44 */
  45#define TRACEPOINT_HASH_BITS 6
  46#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
  47static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
  48
  49/*
  50 * Note about RCU :
  51 * It is used to delay the free of multiple probes array until a quiescent
  52 * state is reached.
  53 * Tracepoint entries modifications are protected by the tracepoints_mutex.
  54 */
  55struct tracepoint_entry {
  56        struct hlist_node hlist;
  57        struct tracepoint_func *funcs;
  58        int refcount;   /* Number of times armed. 0 if disarmed. */
  59        char name[0];
  60};
  61
  62struct tp_probes {
  63        union {
  64                struct rcu_head rcu;
  65                struct list_head list;
  66        } u;
  67        struct tracepoint_func probes[0];
  68};
  69
  70static inline void *allocate_probes(int count)
  71{
  72        struct tp_probes *p  = kmalloc(count * sizeof(struct tracepoint_func)
  73                        + sizeof(struct tp_probes), GFP_KERNEL);
  74        return p == NULL ? NULL : p->probes;
  75}
  76
  77static void rcu_free_old_probes(struct rcu_head *head)
  78{
  79        kfree(container_of(head, struct tp_probes, u.rcu));
  80}
  81
  82static inline void release_probes(struct tracepoint_func *old)
  83{
  84        if (old) {
  85                struct tp_probes *tp_probes = container_of(old,
  86                        struct tp_probes, probes[0]);
  87                call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
  88        }
  89}
  90
  91static void debug_print_probes(struct tracepoint_entry *entry)
  92{
  93        int i;
  94
  95        if (!tracepoint_debug || !entry->funcs)
  96                return;
  97
  98        for (i = 0; entry->funcs[i].func; i++)
  99                printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func);
 100}
 101
 102static struct tracepoint_func *
 103tracepoint_entry_add_probe(struct tracepoint_entry *entry,
 104                           void *probe, void *data)
 105{
 106        int nr_probes = 0;
 107        struct tracepoint_func *old, *new;
 108
 109        WARN_ON(!probe);
 110
 111        debug_print_probes(entry);
 112        old = entry->funcs;
 113        if (old) {
 114                /* (N -> N+1), (N != 0, 1) probes */
 115                for (nr_probes = 0; old[nr_probes].func; nr_probes++)
 116                        if (old[nr_probes].func == probe &&
 117                            old[nr_probes].data == data)
 118                                return ERR_PTR(-EEXIST);
 119        }
 120        /* + 2 : one for new probe, one for NULL func */
 121        new = allocate_probes(nr_probes + 2);
 122        if (new == NULL)
 123                return ERR_PTR(-ENOMEM);
 124        if (old)
 125                memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
 126        new[nr_probes].func = probe;
 127        new[nr_probes].data = data;
 128        new[nr_probes + 1].func = NULL;
 129        entry->refcount = nr_probes + 1;
 130        entry->funcs = new;
 131        debug_print_probes(entry);
 132        return old;
 133}
 134
 135static void *
 136tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
 137                              void *probe, void *data)
 138{
 139        int nr_probes = 0, nr_del = 0, i;
 140        struct tracepoint_func *old, *new;
 141
 142        old = entry->funcs;
 143
 144        if (!old)
 145                return ERR_PTR(-ENOENT);
 146
 147        debug_print_probes(entry);
 148        /* (N -> M), (N > 1, M >= 0) probes */
 149        for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
 150                if (!probe ||
 151                    (old[nr_probes].func == probe &&
 152                     old[nr_probes].data == data))
 153                        nr_del++;
 154        }
 155
 156        if (nr_probes - nr_del == 0) {
 157                /* N -> 0, (N > 1) */
 158                entry->funcs = NULL;
 159                entry->refcount = 0;
 160                debug_print_probes(entry);
 161                return old;
 162        } else {
 163                int j = 0;
 164                /* N -> M, (N > 1, M > 0) */
 165                /* + 1 for NULL */
 166                new = allocate_probes(nr_probes - nr_del + 1);
 167                if (new == NULL)
 168                        return ERR_PTR(-ENOMEM);
 169                for (i = 0; old[i].func; i++)
 170                        if (probe &&
 171                            (old[i].func != probe || old[i].data != data))
 172                                new[j++] = old[i];
 173                new[nr_probes - nr_del].func = NULL;
 174                entry->refcount = nr_probes - nr_del;
 175                entry->funcs = new;
 176        }
 177        debug_print_probes(entry);
 178        return old;
 179}
 180
 181/*
 182 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
 183 * Must be called with tracepoints_mutex held.
 184 * Returns NULL if not present.
 185 */
 186static struct tracepoint_entry *get_tracepoint(const char *name)
 187{
 188        struct hlist_head *head;
 189        struct hlist_node *node;
 190        struct tracepoint_entry *e;
 191        u32 hash = jhash(name, strlen(name), 0);
 192
 193        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 194        hlist_for_each_entry(e, node, head, hlist) {
 195                if (!strcmp(name, e->name))
 196                        return e;
 197        }
 198        return NULL;
 199}
 200
 201/*
 202 * Add the tracepoint to the tracepoint hash table. Must be called with
 203 * tracepoints_mutex held.
 204 */
 205static struct tracepoint_entry *add_tracepoint(const char *name)
 206{
 207        struct hlist_head *head;
 208        struct hlist_node *node;
 209        struct tracepoint_entry *e;
 210        size_t name_len = strlen(name) + 1;
 211        u32 hash = jhash(name, name_len-1, 0);
 212
 213        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 214        hlist_for_each_entry(e, node, head, hlist) {
 215                if (!strcmp(name, e->name)) {
 216                        printk(KERN_NOTICE
 217                                "tracepoint %s busy\n", name);
 218                        return ERR_PTR(-EEXIST);        /* Already there */
 219                }
 220        }
 221        /*
 222         * Using kmalloc here to allocate a variable length element. Could
 223         * cause some memory fragmentation if overused.
 224         */
 225        e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
 226        if (!e)
 227                return ERR_PTR(-ENOMEM);
 228        memcpy(&e->name[0], name, name_len);
 229        e->funcs = NULL;
 230        e->refcount = 0;
 231        hlist_add_head(&e->hlist, head);
 232        return e;
 233}
 234
 235/*
 236 * Remove the tracepoint from the tracepoint hash table. Must be called with
 237 * mutex_lock held.
 238 */
 239static inline void remove_tracepoint(struct tracepoint_entry *e)
 240{
 241        hlist_del(&e->hlist);
 242        kfree(e);
 243}
 244
 245/*
 246 * Sets the probe callback corresponding to one tracepoint.
 247 */
 248static void set_tracepoint(struct tracepoint_entry **entry,
 249        struct tracepoint *elem, int active)
 250{
 251        WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 252
 253        if (elem->regfunc && !elem->state && active)
 254                elem->regfunc();
 255        else if (elem->unregfunc && elem->state && !active)
 256                elem->unregfunc();
 257
 258        /*
 259         * rcu_assign_pointer has a smp_wmb() which makes sure that the new
 260         * probe callbacks array is consistent before setting a pointer to it.
 261         * This array is referenced by __DO_TRACE from
 262         * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
 263         * is used.
 264         */
 265        rcu_assign_pointer(elem->funcs, (*entry)->funcs);
 266        elem->state = active;
 267}
 268
 269/*
 270 * Disable a tracepoint and its probe callback.
 271 * Note: only waiting an RCU period after setting elem->call to the empty
 272 * function insures that the original callback is not used anymore. This insured
 273 * by preempt_disable around the call site.
 274 */
 275static void disable_tracepoint(struct tracepoint *elem)
 276{
 277        if (elem->unregfunc && elem->state)
 278                elem->unregfunc();
 279
 280        elem->state = 0;
 281        rcu_assign_pointer(elem->funcs, NULL);
 282}
 283
 284/**
 285 * tracepoint_update_probe_range - Update a probe range
 286 * @begin: beginning of the range
 287 * @end: end of the range
 288 *
 289 * Updates the probe callback corresponding to a range of tracepoints.
 290 */
 291void
 292tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
 293{
 294        struct tracepoint *iter;
 295        struct tracepoint_entry *mark_entry;
 296
 297        if (!begin)
 298                return;
 299
 300        mutex_lock(&tracepoints_mutex);
 301        for (iter = begin; iter < end; iter++) {
 302                mark_entry = get_tracepoint(iter->name);
 303                if (mark_entry) {
 304                        set_tracepoint(&mark_entry, iter,
 305                                        !!mark_entry->refcount);
 306                } else {
 307                        disable_tracepoint(iter);
 308                }
 309        }
 310        mutex_unlock(&tracepoints_mutex);
 311}
 312
 313/*
 314 * Update probes, removing the faulty probes.
 315 */
 316static void tracepoint_update_probes(void)
 317{
 318        /* Core kernel tracepoints */
 319        tracepoint_update_probe_range(__start___tracepoints,
 320                __stop___tracepoints);
 321        /* tracepoints in modules. */
 322        module_update_tracepoints();
 323}
 324
 325static struct tracepoint_func *
 326tracepoint_add_probe(const char *name, void *probe, void *data)
 327{
 328        struct tracepoint_entry *entry;
 329        struct tracepoint_func *old;
 330
 331        entry = get_tracepoint(name);
 332        if (!entry) {
 333                entry = add_tracepoint(name);
 334                if (IS_ERR(entry))
 335                        return (struct tracepoint_func *)entry;
 336        }
 337        old = tracepoint_entry_add_probe(entry, probe, data);
 338        if (IS_ERR(old) && !entry->refcount)
 339                remove_tracepoint(entry);
 340        return old;
 341}
 342
 343/**
 344 * tracepoint_probe_register -  Connect a probe to a tracepoint
 345 * @name: tracepoint name
 346 * @probe: probe handler
 347 *
 348 * Returns 0 if ok, error value on error.
 349 * The probe address must at least be aligned on the architecture pointer size.
 350 */
 351int tracepoint_probe_register(const char *name, void *probe, void *data)
 352{
 353        struct tracepoint_func *old;
 354
 355        mutex_lock(&tracepoints_mutex);
 356        old = tracepoint_add_probe(name, probe, data);
 357        mutex_unlock(&tracepoints_mutex);
 358        if (IS_ERR(old))
 359                return PTR_ERR(old);
 360
 361        tracepoint_update_probes();             /* may update entry */
 362        release_probes(old);
 363        return 0;
 364}
 365EXPORT_SYMBOL_GPL(tracepoint_probe_register);
 366
 367static struct tracepoint_func *
 368tracepoint_remove_probe(const char *name, void *probe, void *data)
 369{
 370        struct tracepoint_entry *entry;
 371        struct tracepoint_func *old;
 372
 373        entry = get_tracepoint(name);
 374        if (!entry)
 375                return ERR_PTR(-ENOENT);
 376        old = tracepoint_entry_remove_probe(entry, probe, data);
 377        if (IS_ERR(old))
 378                return old;
 379        if (!entry->refcount)
 380                remove_tracepoint(entry);
 381        return old;
 382}
 383
 384/**
 385 * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
 386 * @name: tracepoint name
 387 * @probe: probe function pointer
 388 *
 389 * We do not need to call a synchronize_sched to make sure the probes have
 390 * finished running before doing a module unload, because the module unload
 391 * itself uses stop_machine(), which insures that every preempt disabled section
 392 * have finished.
 393 */
 394int tracepoint_probe_unregister(const char *name, void *probe, void *data)
 395{
 396        struct tracepoint_func *old;
 397
 398        mutex_lock(&tracepoints_mutex);
 399        old = tracepoint_remove_probe(name, probe, data);
 400        mutex_unlock(&tracepoints_mutex);
 401        if (IS_ERR(old))
 402                return PTR_ERR(old);
 403
 404        tracepoint_update_probes();             /* may update entry */
 405        release_probes(old);
 406        return 0;
 407}
 408EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 409
 410static LIST_HEAD(old_probes);
 411static int need_update;
 412
 413static void tracepoint_add_old_probes(void *old)
 414{
 415        need_update = 1;
 416        if (old) {
 417                struct tp_probes *tp_probes = container_of(old,
 418                        struct tp_probes, probes[0]);
 419                list_add(&tp_probes->u.list, &old_probes);
 420        }
 421}
 422
 423/**
 424 * tracepoint_probe_register_noupdate -  register a probe but not connect
 425 * @name: tracepoint name
 426 * @probe: probe handler
 427 *
 428 * caller must call tracepoint_probe_update_all()
 429 */
 430int tracepoint_probe_register_noupdate(const char *name, void *probe,
 431                                       void *data)
 432{
 433        struct tracepoint_func *old;
 434
 435        mutex_lock(&tracepoints_mutex);
 436        old = tracepoint_add_probe(name, probe, data);
 437        if (IS_ERR(old)) {
 438                mutex_unlock(&tracepoints_mutex);
 439                return PTR_ERR(old);
 440        }
 441        tracepoint_add_old_probes(old);
 442        mutex_unlock(&tracepoints_mutex);
 443        return 0;
 444}
 445EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
 446
 447/**
 448 * tracepoint_probe_unregister_noupdate -  remove a probe but not disconnect
 449 * @name: tracepoint name
 450 * @probe: probe function pointer
 451 *
 452 * caller must call tracepoint_probe_update_all()
 453 */
 454int tracepoint_probe_unregister_noupdate(const char *name, void *probe,
 455                                         void *data)
 456{
 457        struct tracepoint_func *old;
 458
 459        mutex_lock(&tracepoints_mutex);
 460        old = tracepoint_remove_probe(name, probe, data);
 461        if (IS_ERR(old)) {
 462                mutex_unlock(&tracepoints_mutex);
 463                return PTR_ERR(old);
 464        }
 465        tracepoint_add_old_probes(old);
 466        mutex_unlock(&tracepoints_mutex);
 467        return 0;
 468}
 469EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
 470
 471/**
 472 * tracepoint_probe_update_all -  update tracepoints
 473 */
 474void tracepoint_probe_update_all(void)
 475{
 476        LIST_HEAD(release_probes);
 477        struct tp_probes *pos, *next;
 478
 479        mutex_lock(&tracepoints_mutex);
 480        if (!need_update) {
 481                mutex_unlock(&tracepoints_mutex);
 482                return;
 483        }
 484        if (!list_empty(&old_probes))
 485                list_replace_init(&old_probes, &release_probes);
 486        need_update = 0;
 487        mutex_unlock(&tracepoints_mutex);
 488
 489        tracepoint_update_probes();
 490        list_for_each_entry_safe(pos, next, &release_probes, u.list) {
 491                list_del(&pos->u.list);
 492                call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
 493        }
 494}
 495EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
 496
 497/**
 498 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
 499 * @tracepoint: current tracepoints (in), next tracepoint (out)
 500 * @begin: beginning of the range
 501 * @end: end of the range
 502 *
 503 * Returns whether a next tracepoint has been found (1) or not (0).
 504 * Will return the first tracepoint in the range if the input tracepoint is
 505 * NULL.
 506 */
 507int tracepoint_get_iter_range(struct tracepoint **tracepoint,
 508        struct tracepoint *begin, struct tracepoint *end)
 509{
 510        if (!*tracepoint && begin != end) {
 511                *tracepoint = begin;
 512                return 1;
 513        }
 514        if (*tracepoint >= begin && *tracepoint < end)
 515                return 1;
 516        return 0;
 517}
 518EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
 519
 520static void tracepoint_get_iter(struct tracepoint_iter *iter)
 521{
 522        int found = 0;
 523
 524        /* Core kernel tracepoints */
 525        if (!iter->module) {
 526                found = tracepoint_get_iter_range(&iter->tracepoint,
 527                                __start___tracepoints, __stop___tracepoints);
 528                if (found)
 529                        goto end;
 530        }
 531        /* tracepoints in modules. */
 532        found = module_get_iter_tracepoints(iter);
 533end:
 534        if (!found)
 535                tracepoint_iter_reset(iter);
 536}
 537
 538void tracepoint_iter_start(struct tracepoint_iter *iter)
 539{
 540        tracepoint_get_iter(iter);
 541}
 542EXPORT_SYMBOL_GPL(tracepoint_iter_start);
 543
 544void tracepoint_iter_next(struct tracepoint_iter *iter)
 545{
 546        iter->tracepoint++;
 547        /*
 548         * iter->tracepoint may be invalid because we blindly incremented it.
 549         * Make sure it is valid by marshalling on the tracepoints, getting the
 550         * tracepoints from following modules if necessary.
 551         */
 552        tracepoint_get_iter(iter);
 553}
 554EXPORT_SYMBOL_GPL(tracepoint_iter_next);
 555
 556void tracepoint_iter_stop(struct tracepoint_iter *iter)
 557{
 558}
 559EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
 560
 561void tracepoint_iter_reset(struct tracepoint_iter *iter)
 562{
 563        iter->module = NULL;
 564        iter->tracepoint = NULL;
 565}
 566EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
 567
 568#ifdef CONFIG_MODULES
 569
 570int tracepoint_module_notify(struct notifier_block *self,
 571                             unsigned long val, void *data)
 572{
 573        struct module *mod = data;
 574
 575        switch (val) {
 576        case MODULE_STATE_COMING:
 577        case MODULE_STATE_GOING:
 578                tracepoint_update_probe_range(mod->tracepoints,
 579                        mod->tracepoints + mod->num_tracepoints);
 580                break;
 581        }
 582        return 0;
 583}
 584
 585struct notifier_block tracepoint_module_nb = {
 586        .notifier_call = tracepoint_module_notify,
 587        .priority = 0,
 588};
 589
 590static int init_tracepoints(void)
 591{
 592        return register_module_notifier(&tracepoint_module_nb);
 593}
 594__initcall(init_tracepoints);
 595
 596#endif /* CONFIG_MODULES */
 597
 598#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 599
 600/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 601static int sys_tracepoint_refcount;
 602
 603void syscall_regfunc(void)
 604{
 605        unsigned long flags;
 606        struct task_struct *g, *t;
 607
 608        if (!sys_tracepoint_refcount) {
 609                read_lock_irqsave(&tasklist_lock, flags);
 610                do_each_thread(g, t) {
 611                        /* Skip kernel threads. */
 612                        if (t->mm)
 613                                set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 614                } while_each_thread(g, t);
 615                read_unlock_irqrestore(&tasklist_lock, flags);
 616        }
 617        sys_tracepoint_refcount++;
 618}
 619
 620void syscall_unregfunc(void)
 621{
 622        unsigned long flags;
 623        struct task_struct *g, *t;
 624
 625        sys_tracepoint_refcount--;
 626        if (!sys_tracepoint_refcount) {
 627                read_lock_irqsave(&tasklist_lock, flags);
 628                do_each_thread(g, t) {
 629                        clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 630                } while_each_thread(g, t);
 631                read_unlock_irqrestore(&tasklist_lock, flags);
 632        }
 633}
 634#endif
 635
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.