linux/kernel/tracepoint.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2008 Mathieu Desnoyers
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 */
  18#include <linux/module.h>
  19#include <linux/mutex.h>
  20#include <linux/types.h>
  21#include <linux/jhash.h>
  22#include <linux/list.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tracepoint.h>
  25#include <linux/err.h>
  26#include <linux/slab.h>
  27#include <linux/sched.h>
  28#include <linux/jump_label.h>
  29
  30extern struct tracepoint * const __start___tracepoints_ptrs[];
  31extern struct tracepoint * const __stop___tracepoints_ptrs[];
  32
  33/* Set to 1 to enable tracepoint debug output */
  34static const int tracepoint_debug;
  35
  36/*
  37 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
  38 * builtin and module tracepoints and the hash table.
  39 */
  40static DEFINE_MUTEX(tracepoints_mutex);
  41
  42/*
  43 * Tracepoint hash table, containing the active tracepoints.
  44 * Protected by tracepoints_mutex.
  45 */
  46#define TRACEPOINT_HASH_BITS 6
  47#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
  48static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
  49
  50/*
  51 * Note about RCU :
  52 * It is used to delay the free of multiple probes array until a quiescent
  53 * state is reached.
  54 * Tracepoint entries modifications are protected by the tracepoints_mutex.
  55 */
  56struct tracepoint_entry {
  57        struct hlist_node hlist;
  58        struct tracepoint_func *funcs;
  59        int refcount;   /* Number of times armed. 0 if disarmed. */
  60        char name[0];
  61};
  62
  63struct tp_probes {
  64        union {
  65                struct rcu_head rcu;
  66                struct list_head list;
  67        } u;
  68        struct tracepoint_func probes[0];
  69};
  70
  71static inline void *allocate_probes(int count)
  72{
  73        struct tp_probes *p  = kmalloc(count * sizeof(struct tracepoint_func)
  74                        + sizeof(struct tp_probes), GFP_KERNEL);
  75        return p == NULL ? NULL : p->probes;
  76}
  77
  78static void rcu_free_old_probes(struct rcu_head *head)
  79{
  80        kfree(container_of(head, struct tp_probes, u.rcu));
  81}
  82
  83static inline void release_probes(struct tracepoint_func *old)
  84{
  85        if (old) {
  86                struct tp_probes *tp_probes = container_of(old,
  87                        struct tp_probes, probes[0]);
  88                call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
  89        }
  90}
  91
  92static void debug_print_probes(struct tracepoint_entry *entry)
  93{
  94        int i;
  95
  96        if (!tracepoint_debug || !entry->funcs)
  97                return;
  98
  99        for (i = 0; entry->funcs[i].func; i++)
 100                printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func);
 101}
 102
 103static struct tracepoint_func *
 104tracepoint_entry_add_probe(struct tracepoint_entry *entry,
 105                           void *probe, void *data)
 106{
 107        int nr_probes = 0;
 108        struct tracepoint_func *old, *new;
 109
 110        WARN_ON(!probe);
 111
 112        debug_print_probes(entry);
 113        old = entry->funcs;
 114        if (old) {
 115                /* (N -> N+1), (N != 0, 1) probes */
 116                for (nr_probes = 0; old[nr_probes].func; nr_probes++)
 117                        if (old[nr_probes].func == probe &&
 118                            old[nr_probes].data == data)
 119                                return ERR_PTR(-EEXIST);
 120        }
 121        /* + 2 : one for new probe, one for NULL func */
 122        new = allocate_probes(nr_probes + 2);
 123        if (new == NULL)
 124                return ERR_PTR(-ENOMEM);
 125        if (old)
 126                memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
 127        new[nr_probes].func = probe;
 128        new[nr_probes].data = data;
 129        new[nr_probes + 1].func = NULL;
 130        entry->refcount = nr_probes + 1;
 131        entry->funcs = new;
 132        debug_print_probes(entry);
 133        return old;
 134}
 135
 136static void *
 137tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
 138                              void *probe, void *data)
 139{
 140        int nr_probes = 0, nr_del = 0, i;
 141        struct tracepoint_func *old, *new;
 142
 143        old = entry->funcs;
 144
 145        if (!old)
 146                return ERR_PTR(-ENOENT);
 147
 148        debug_print_probes(entry);
 149        /* (N -> M), (N > 1, M >= 0) probes */
 150        for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
 151                if (!probe ||
 152                    (old[nr_probes].func == probe &&
 153                     old[nr_probes].data == data))
 154                        nr_del++;
 155        }
 156
 157        if (nr_probes - nr_del == 0) {
 158                /* N -> 0, (N > 1) */
 159                entry->funcs = NULL;
 160                entry->refcount = 0;
 161                debug_print_probes(entry);
 162                return old;
 163        } else {
 164                int j = 0;
 165                /* N -> M, (N > 1, M > 0) */
 166                /* + 1 for NULL */
 167                new = allocate_probes(nr_probes - nr_del + 1);
 168                if (new == NULL)
 169                        return ERR_PTR(-ENOMEM);
 170                for (i = 0; old[i].func; i++)
 171                        if (probe &&
 172                            (old[i].func != probe || old[i].data != data))
 173                                new[j++] = old[i];
 174                new[nr_probes - nr_del].func = NULL;
 175                entry->refcount = nr_probes - nr_del;
 176                entry->funcs = new;
 177        }
 178        debug_print_probes(entry);
 179        return old;
 180}
 181
 182/*
 183 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
 184 * Must be called with tracepoints_mutex held.
 185 * Returns NULL if not present.
 186 */
 187static struct tracepoint_entry *get_tracepoint(const char *name)
 188{
 189        struct hlist_head *head;
 190        struct hlist_node *node;
 191        struct tracepoint_entry *e;
 192        u32 hash = jhash(name, strlen(name), 0);
 193
 194        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 195        hlist_for_each_entry(e, node, head, hlist) {
 196                if (!strcmp(name, e->name))
 197                        return e;
 198        }
 199        return NULL;
 200}
 201
 202/*
 203 * Add the tracepoint to the tracepoint hash table. Must be called with
 204 * tracepoints_mutex held.
 205 */
 206static struct tracepoint_entry *add_tracepoint(const char *name)
 207{
 208        struct hlist_head *head;
 209        struct hlist_node *node;
 210        struct tracepoint_entry *e;
 211        size_t name_len = strlen(name) + 1;
 212        u32 hash = jhash(name, name_len-1, 0);
 213
 214        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 215        hlist_for_each_entry(e, node, head, hlist) {
 216                if (!strcmp(name, e->name)) {
 217                        printk(KERN_NOTICE
 218                                "tracepoint %s busy\n", name);
 219                        return ERR_PTR(-EEXIST);        /* Already there */
 220                }
 221        }
 222        /*
 223         * Using kmalloc here to allocate a variable length element. Could
 224         * cause some memory fragmentation if overused.
 225         */
 226        e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
 227        if (!e)
 228                return ERR_PTR(-ENOMEM);
 229        memcpy(&e->name[0], name, name_len);
 230        e->funcs = NULL;
 231        e->refcount = 0;
 232        hlist_add_head(&e->hlist, head);
 233        return e;
 234}
 235
 236/*
 237 * Remove the tracepoint from the tracepoint hash table. Must be called with
 238 * mutex_lock held.
 239 */
 240static inline void remove_tracepoint(struct tracepoint_entry *e)
 241{
 242        hlist_del(&e->hlist);
 243        kfree(e);
 244}
 245
 246/*
 247 * Sets the probe callback corresponding to one tracepoint.
 248 */
 249static void set_tracepoint(struct tracepoint_entry **entry,
 250        struct tracepoint *elem, int active)
 251{
 252        WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 253
 254        if (elem->regfunc && !jump_label_enabled(&elem->key) && active)
 255                elem->regfunc();
 256        else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active)
 257                elem->unregfunc();
 258
 259        /*
 260         * rcu_assign_pointer has a smp_wmb() which makes sure that the new
 261         * probe callbacks array is consistent before setting a pointer to it.
 262         * This array is referenced by __DO_TRACE from
 263         * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
 264         * is used.
 265         */
 266        rcu_assign_pointer(elem->funcs, (*entry)->funcs);
 267        if (active && !jump_label_enabled(&elem->key))
 268                jump_label_inc(&elem->key);
 269        else if (!active && jump_label_enabled(&elem->key))
 270                jump_label_dec(&elem->key);
 271}
 272
 273/*
 274 * Disable a tracepoint and its probe callback.
 275 * Note: only waiting an RCU period after setting elem->call to the empty
 276 * function insures that the original callback is not used anymore. This insured
 277 * by preempt_disable around the call site.
 278 */
 279static void disable_tracepoint(struct tracepoint *elem)
 280{
 281        if (elem->unregfunc && jump_label_enabled(&elem->key))
 282                elem->unregfunc();
 283
 284        if (jump_label_enabled(&elem->key))
 285                jump_label_dec(&elem->key);
 286        rcu_assign_pointer(elem->funcs, NULL);
 287}
 288
 289/**
 290 * tracepoint_update_probe_range - Update a probe range
 291 * @begin: beginning of the range
 292 * @end: end of the range
 293 *
 294 * Updates the probe callback corresponding to a range of tracepoints.
 295 */
 296void tracepoint_update_probe_range(struct tracepoint * const *begin,
 297                                   struct tracepoint * const *end)
 298{
 299        struct tracepoint * const *iter;
 300        struct tracepoint_entry *mark_entry;
 301
 302        if (!begin)
 303                return;
 304
 305        mutex_lock(&tracepoints_mutex);
 306        for (iter = begin; iter < end; iter++) {
 307                mark_entry = get_tracepoint((*iter)->name);
 308                if (mark_entry) {
 309                        set_tracepoint(&mark_entry, *iter,
 310                                        !!mark_entry->refcount);
 311                } else {
 312                        disable_tracepoint(*iter);
 313                }
 314        }
 315        mutex_unlock(&tracepoints_mutex);
 316}
 317
 318/*
 319 * Update probes, removing the faulty probes.
 320 */
 321static void tracepoint_update_probes(void)
 322{
 323        /* Core kernel tracepoints */
 324        tracepoint_update_probe_range(__start___tracepoints_ptrs,
 325                __stop___tracepoints_ptrs);
 326        /* tracepoints in modules. */
 327        module_update_tracepoints();
 328}
 329
 330static struct tracepoint_func *
 331tracepoint_add_probe(const char *name, void *probe, void *data)
 332{
 333        struct tracepoint_entry *entry;
 334        struct tracepoint_func *old;
 335
 336        entry = get_tracepoint(name);
 337        if (!entry) {
 338                entry = add_tracepoint(name);
 339                if (IS_ERR(entry))
 340                        return (struct tracepoint_func *)entry;
 341        }
 342        old = tracepoint_entry_add_probe(entry, probe, data);
 343        if (IS_ERR(old) && !entry->refcount)
 344                remove_tracepoint(entry);
 345        return old;
 346}
 347
 348/**
 349 * tracepoint_probe_register -  Connect a probe to a tracepoint
 350 * @name: tracepoint name
 351 * @probe: probe handler
 352 *
 353 * Returns 0 if ok, error value on error.
 354 * The probe address must at least be aligned on the architecture pointer size.
 355 */
 356int tracepoint_probe_register(const char *name, void *probe, void *data)
 357{
 358        struct tracepoint_func *old;
 359
 360        mutex_lock(&tracepoints_mutex);
 361        old = tracepoint_add_probe(name, probe, data);
 362        mutex_unlock(&tracepoints_mutex);
 363        if (IS_ERR(old))
 364                return PTR_ERR(old);
 365
 366        tracepoint_update_probes();             /* may update entry */
 367        release_probes(old);
 368        return 0;
 369}
 370EXPORT_SYMBOL_GPL(tracepoint_probe_register);
 371
 372static struct tracepoint_func *
 373tracepoint_remove_probe(const char *name, void *probe, void *data)
 374{
 375        struct tracepoint_entry *entry;
 376        struct tracepoint_func *old;
 377
 378        entry = get_tracepoint(name);
 379        if (!entry)
 380                return ERR_PTR(-ENOENT);
 381        old = tracepoint_entry_remove_probe(entry, probe, data);
 382        if (IS_ERR(old))
 383                return old;
 384        if (!entry->refcount)
 385                remove_tracepoint(entry);
 386        return old;
 387}
 388
 389/**
 390 * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
 391 * @name: tracepoint name
 392 * @probe: probe function pointer
 393 *
 394 * We do not need to call a synchronize_sched to make sure the probes have
 395 * finished running before doing a module unload, because the module unload
 396 * itself uses stop_machine(), which insures that every preempt disabled section
 397 * have finished.
 398 */
 399int tracepoint_probe_unregister(const char *name, void *probe, void *data)
 400{
 401        struct tracepoint_func *old;
 402
 403        mutex_lock(&tracepoints_mutex);
 404        old = tracepoint_remove_probe(name, probe, data);
 405        mutex_unlock(&tracepoints_mutex);
 406        if (IS_ERR(old))
 407                return PTR_ERR(old);
 408
 409        tracepoint_update_probes();             /* may update entry */
 410        release_probes(old);
 411        return 0;
 412}
 413EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 414
 415static LIST_HEAD(old_probes);
 416static int need_update;
 417
 418static void tracepoint_add_old_probes(void *old)
 419{
 420        need_update = 1;
 421        if (old) {
 422                struct tp_probes *tp_probes = container_of(old,
 423                        struct tp_probes, probes[0]);
 424                list_add(&tp_probes->u.list, &old_probes);
 425        }
 426}
 427
 428/**
 429 * tracepoint_probe_register_noupdate -  register a probe but not connect
 430 * @name: tracepoint name
 431 * @probe: probe handler
 432 *
 433 * caller must call tracepoint_probe_update_all()
 434 */
 435int tracepoint_probe_register_noupdate(const char *name, void *probe,
 436                                       void *data)
 437{
 438        struct tracepoint_func *old;
 439
 440        mutex_lock(&tracepoints_mutex);
 441        old = tracepoint_add_probe(name, probe, data);
 442        if (IS_ERR(old)) {
 443                mutex_unlock(&tracepoints_mutex);
 444                return PTR_ERR(old);
 445        }
 446        tracepoint_add_old_probes(old);
 447        mutex_unlock(&tracepoints_mutex);
 448        return 0;
 449}
 450EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
 451
 452/**
 453 * tracepoint_probe_unregister_noupdate -  remove a probe but not disconnect
 454 * @name: tracepoint name
 455 * @probe: probe function pointer
 456 *
 457 * caller must call tracepoint_probe_update_all()
 458 */
 459int tracepoint_probe_unregister_noupdate(const char *name, void *probe,
 460                                         void *data)
 461{
 462        struct tracepoint_func *old;
 463
 464        mutex_lock(&tracepoints_mutex);
 465        old = tracepoint_remove_probe(name, probe, data);
 466        if (IS_ERR(old)) {
 467                mutex_unlock(&tracepoints_mutex);
 468                return PTR_ERR(old);
 469        }
 470        tracepoint_add_old_probes(old);
 471        mutex_unlock(&tracepoints_mutex);
 472        return 0;
 473}
 474EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
 475
 476/**
 477 * tracepoint_probe_update_all -  update tracepoints
 478 */
 479void tracepoint_probe_update_all(void)
 480{
 481        LIST_HEAD(release_probes);
 482        struct tp_probes *pos, *next;
 483
 484        mutex_lock(&tracepoints_mutex);
 485        if (!need_update) {
 486                mutex_unlock(&tracepoints_mutex);
 487                return;
 488        }
 489        if (!list_empty(&old_probes))
 490                list_replace_init(&old_probes, &release_probes);
 491        need_update = 0;
 492        mutex_unlock(&tracepoints_mutex);
 493
 494        tracepoint_update_probes();
 495        list_for_each_entry_safe(pos, next, &release_probes, u.list) {
 496                list_del(&pos->u.list);
 497                call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
 498        }
 499}
 500EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
 501
 502/**
 503 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
 504 * @tracepoint: current tracepoints (in), next tracepoint (out)
 505 * @begin: beginning of the range
 506 * @end: end of the range
 507 *
 508 * Returns whether a next tracepoint has been found (1) or not (0).
 509 * Will return the first tracepoint in the range if the input tracepoint is
 510 * NULL.
 511 */
 512int tracepoint_get_iter_range(struct tracepoint * const **tracepoint,
 513        struct tracepoint * const *begin, struct tracepoint * const *end)
 514{
 515        if (!*tracepoint && begin != end) {
 516                *tracepoint = begin;
 517                return 1;
 518        }
 519        if (*tracepoint >= begin && *tracepoint < end)
 520                return 1;
 521        return 0;
 522}
 523EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
 524
 525static void tracepoint_get_iter(struct tracepoint_iter *iter)
 526{
 527        int found = 0;
 528
 529        /* Core kernel tracepoints */
 530        if (!iter->module) {
 531                found = tracepoint_get_iter_range(&iter->tracepoint,
 532                                __start___tracepoints_ptrs,
 533                                __stop___tracepoints_ptrs);
 534                if (found)
 535                        goto end;
 536        }
 537        /* tracepoints in modules. */
 538        found = module_get_iter_tracepoints(iter);
 539end:
 540        if (!found)
 541                tracepoint_iter_reset(iter);
 542}
 543
 544void tracepoint_iter_start(struct tracepoint_iter *iter)
 545{
 546        tracepoint_get_iter(iter);
 547}
 548EXPORT_SYMBOL_GPL(tracepoint_iter_start);
 549
 550void tracepoint_iter_next(struct tracepoint_iter *iter)
 551{
 552        iter->tracepoint++;
 553        /*
 554         * iter->tracepoint may be invalid because we blindly incremented it.
 555         * Make sure it is valid by marshalling on the tracepoints, getting the
 556         * tracepoints from following modules if necessary.
 557         */
 558        tracepoint_get_iter(iter);
 559}
 560EXPORT_SYMBOL_GPL(tracepoint_iter_next);
 561
 562void tracepoint_iter_stop(struct tracepoint_iter *iter)
 563{
 564}
 565EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
 566
 567void tracepoint_iter_reset(struct tracepoint_iter *iter)
 568{
 569        iter->module = NULL;
 570        iter->tracepoint = NULL;
 571}
 572EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
 573
 574#ifdef CONFIG_MODULES
 575
 576int tracepoint_module_notify(struct notifier_block *self,
 577                             unsigned long val, void *data)
 578{
 579        struct module *mod = data;
 580
 581        switch (val) {
 582        case MODULE_STATE_COMING:
 583        case MODULE_STATE_GOING:
 584                tracepoint_update_probe_range(mod->tracepoints_ptrs,
 585                        mod->tracepoints_ptrs + mod->num_tracepoints);
 586                break;
 587        }
 588        return 0;
 589}
 590
 591struct notifier_block tracepoint_module_nb = {
 592        .notifier_call = tracepoint_module_notify,
 593        .priority = 0,
 594};
 595
 596static int init_tracepoints(void)
 597{
 598        return register_module_notifier(&tracepoint_module_nb);
 599}
 600__initcall(init_tracepoints);
 601
 602#endif /* CONFIG_MODULES */
 603
 604#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 605
 606/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 607static int sys_tracepoint_refcount;
 608
 609void syscall_regfunc(void)
 610{
 611        unsigned long flags;
 612        struct task_struct *g, *t;
 613
 614        if (!sys_tracepoint_refcount) {
 615                read_lock_irqsave(&tasklist_lock, flags);
 616                do_each_thread(g, t) {
 617                        /* Skip kernel threads. */
 618                        if (t->mm)
 619                                set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 620                } while_each_thread(g, t);
 621                read_unlock_irqrestore(&tasklist_lock, flags);
 622        }
 623        sys_tracepoint_refcount++;
 624}
 625
 626void syscall_unregfunc(void)
 627{
 628        unsigned long flags;
 629        struct task_struct *g, *t;
 630
 631        sys_tracepoint_refcount--;
 632        if (!sys_tracepoint_refcount) {
 633                read_lock_irqsave(&tasklist_lock, flags);
 634                do_each_thread(g, t) {
 635                        clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 636                } while_each_thread(g, t);
 637                read_unlock_irqrestore(&tasklist_lock, flags);
 638        }
 639}
 640#endif
 641
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.