linux/kernel/trace/trace_kprobe.c
<<
>>
Prefs
   1/*
   2 * Kprobes-based tracing events
   3 *
   4 * Created by Masami Hiramatsu <mhiramat@redhat.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, write to the Free Software
  17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18 */
  19
  20#include <linux/module.h>
  21#include <linux/uaccess.h>
  22
  23#include "trace_probe.h"
  24
  25#define KPROBE_EVENT_SYSTEM "kprobes"
  26
  27/**
  28 * Kprobe event core functions
  29 */
  30
  31struct trace_probe {
  32        struct list_head        list;
  33        struct kretprobe        rp;     /* Use rp.kp for kprobe use */
  34        unsigned long           nhit;
  35        unsigned int            flags;  /* For TP_FLAG_* */
  36        const char              *symbol;        /* symbol name */
  37        struct ftrace_event_class       class;
  38        struct ftrace_event_call        call;
  39        ssize_t                 size;           /* trace entry size */
  40        unsigned int            nr_args;
  41        struct probe_arg        args[];
  42};
  43
  44#define SIZEOF_TRACE_PROBE(n)                   \
  45        (offsetof(struct trace_probe, args) +   \
  46        (sizeof(struct probe_arg) * (n)))
  47
  48
  49static __kprobes int trace_probe_is_return(struct trace_probe *tp)
  50{
  51        return tp->rp.handler != NULL;
  52}
  53
  54static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
  55{
  56        return tp->symbol ? tp->symbol : "unknown";
  57}
  58
  59static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
  60{
  61        return tp->rp.kp.offset;
  62}
  63
  64static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
  65{
  66        return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
  67}
  68
  69static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
  70{
  71        return !!(tp->flags & TP_FLAG_REGISTERED);
  72}
  73
  74static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
  75{
  76        return !!(kprobe_gone(&tp->rp.kp));
  77}
  78
  79static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
  80                                                struct module *mod)
  81{
  82        int len = strlen(mod->name);
  83        const char *name = trace_probe_symbol(tp);
  84        return strncmp(mod->name, name, len) == 0 && name[len] == ':';
  85}
  86
  87static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
  88{
  89        return !!strchr(trace_probe_symbol(tp), ':');
  90}
  91
  92static int register_probe_event(struct trace_probe *tp);
  93static void unregister_probe_event(struct trace_probe *tp);
  94
  95static DEFINE_MUTEX(probe_lock);
  96static LIST_HEAD(probe_list);
  97
  98static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
  99static int kretprobe_dispatcher(struct kretprobe_instance *ri,
 100                                struct pt_regs *regs);
 101
 102/*
 103 * Allocate new trace_probe and initialize it (including kprobes).
 104 */
 105static struct trace_probe *alloc_trace_probe(const char *group,
 106                                             const char *event,
 107                                             void *addr,
 108                                             const char *symbol,
 109                                             unsigned long offs,
 110                                             int nargs, bool is_return)
 111{
 112        struct trace_probe *tp;
 113        int ret = -ENOMEM;
 114
 115        tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
 116        if (!tp)
 117                return ERR_PTR(ret);
 118
 119        if (symbol) {
 120                tp->symbol = kstrdup(symbol, GFP_KERNEL);
 121                if (!tp->symbol)
 122                        goto error;
 123                tp->rp.kp.symbol_name = tp->symbol;
 124                tp->rp.kp.offset = offs;
 125        } else
 126                tp->rp.kp.addr = addr;
 127
 128        if (is_return)
 129                tp->rp.handler = kretprobe_dispatcher;
 130        else
 131                tp->rp.kp.pre_handler = kprobe_dispatcher;
 132
 133        if (!event || !is_good_name(event)) {
 134                ret = -EINVAL;
 135                goto error;
 136        }
 137
 138        tp->call.class = &tp->class;
 139        tp->call.name = kstrdup(event, GFP_KERNEL);
 140        if (!tp->call.name)
 141                goto error;
 142
 143        if (!group || !is_good_name(group)) {
 144                ret = -EINVAL;
 145                goto error;
 146        }
 147
 148        tp->class.system = kstrdup(group, GFP_KERNEL);
 149        if (!tp->class.system)
 150                goto error;
 151
 152        INIT_LIST_HEAD(&tp->list);
 153        return tp;
 154error:
 155        kfree(tp->call.name);
 156        kfree(tp->symbol);
 157        kfree(tp);
 158        return ERR_PTR(ret);
 159}
 160
 161static void free_trace_probe(struct trace_probe *tp)
 162{
 163        int i;
 164
 165        for (i = 0; i < tp->nr_args; i++)
 166                traceprobe_free_probe_arg(&tp->args[i]);
 167
 168        kfree(tp->call.class->system);
 169        kfree(tp->call.name);
 170        kfree(tp->symbol);
 171        kfree(tp);
 172}
 173
 174static struct trace_probe *find_trace_probe(const char *event,
 175                                            const char *group)
 176{
 177        struct trace_probe *tp;
 178
 179        list_for_each_entry(tp, &probe_list, list)
 180                if (strcmp(tp->call.name, event) == 0 &&
 181                    strcmp(tp->call.class->system, group) == 0)
 182                        return tp;
 183        return NULL;
 184}
 185
 186/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
 187static int enable_trace_probe(struct trace_probe *tp, int flag)
 188{
 189        int ret = 0;
 190
 191        tp->flags |= flag;
 192        if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
 193            !trace_probe_has_gone(tp)) {
 194                if (trace_probe_is_return(tp))
 195                        ret = enable_kretprobe(&tp->rp);
 196                else
 197                        ret = enable_kprobe(&tp->rp.kp);
 198        }
 199
 200        return ret;
 201}
 202
 203/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
 204static void disable_trace_probe(struct trace_probe *tp, int flag)
 205{
 206        tp->flags &= ~flag;
 207        if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
 208                if (trace_probe_is_return(tp))
 209                        disable_kretprobe(&tp->rp);
 210                else
 211                        disable_kprobe(&tp->rp.kp);
 212        }
 213}
 214
 215/* Internal register function - just handle k*probes and flags */
 216static int __register_trace_probe(struct trace_probe *tp)
 217{
 218        int i, ret;
 219
 220        if (trace_probe_is_registered(tp))
 221                return -EINVAL;
 222
 223        for (i = 0; i < tp->nr_args; i++)
 224                traceprobe_update_arg(&tp->args[i]);
 225
 226        /* Set/clear disabled flag according to tp->flag */
 227        if (trace_probe_is_enabled(tp))
 228                tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
 229        else
 230                tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
 231
 232        if (trace_probe_is_return(tp))
 233                ret = register_kretprobe(&tp->rp);
 234        else
 235                ret = register_kprobe(&tp->rp.kp);
 236
 237        if (ret == 0)
 238                tp->flags |= TP_FLAG_REGISTERED;
 239        else {
 240                pr_warning("Could not insert probe at %s+%lu: %d\n",
 241                           trace_probe_symbol(tp), trace_probe_offset(tp), ret);
 242                if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
 243                        pr_warning("This probe might be able to register after"
 244                                   "target module is loaded. Continue.\n");
 245                        ret = 0;
 246                } else if (ret == -EILSEQ) {
 247                        pr_warning("Probing address(0x%p) is not an "
 248                                   "instruction boundary.\n",
 249                                   tp->rp.kp.addr);
 250                        ret = -EINVAL;
 251                }
 252        }
 253
 254        return ret;
 255}
 256
 257/* Internal unregister function - just handle k*probes and flags */
 258static void __unregister_trace_probe(struct trace_probe *tp)
 259{
 260        if (trace_probe_is_registered(tp)) {
 261                if (trace_probe_is_return(tp))
 262                        unregister_kretprobe(&tp->rp);
 263                else
 264                        unregister_kprobe(&tp->rp.kp);
 265                tp->flags &= ~TP_FLAG_REGISTERED;
 266                /* Cleanup kprobe for reuse */
 267                if (tp->rp.kp.symbol_name)
 268                        tp->rp.kp.addr = NULL;
 269        }
 270}
 271
 272/* Unregister a trace_probe and probe_event: call with locking probe_lock */
 273static int unregister_trace_probe(struct trace_probe *tp)
 274{
 275        /* Enabled event can not be unregistered */
 276        if (trace_probe_is_enabled(tp))
 277                return -EBUSY;
 278
 279        __unregister_trace_probe(tp);
 280        list_del(&tp->list);
 281        unregister_probe_event(tp);
 282
 283        return 0;
 284}
 285
 286/* Register a trace_probe and probe_event */
 287static int register_trace_probe(struct trace_probe *tp)
 288{
 289        struct trace_probe *old_tp;
 290        int ret;
 291
 292        mutex_lock(&probe_lock);
 293
 294        /* Delete old (same name) event if exist */
 295        old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
 296        if (old_tp) {
 297                ret = unregister_trace_probe(old_tp);
 298                if (ret < 0)
 299                        goto end;
 300                free_trace_probe(old_tp);
 301        }
 302
 303        /* Register new event */
 304        ret = register_probe_event(tp);
 305        if (ret) {
 306                pr_warning("Failed to register probe event(%d)\n", ret);
 307                goto end;
 308        }
 309
 310        /* Register k*probe */
 311        ret = __register_trace_probe(tp);
 312        if (ret < 0)
 313                unregister_probe_event(tp);
 314        else
 315                list_add_tail(&tp->list, &probe_list);
 316
 317end:
 318        mutex_unlock(&probe_lock);
 319        return ret;
 320}
 321
 322/* Module notifier call back, checking event on the module */
 323static int trace_probe_module_callback(struct notifier_block *nb,
 324                                       unsigned long val, void *data)
 325{
 326        struct module *mod = data;
 327        struct trace_probe *tp;
 328        int ret;
 329
 330        if (val != MODULE_STATE_COMING)
 331                return NOTIFY_DONE;
 332
 333        /* Update probes on coming module */
 334        mutex_lock(&probe_lock);
 335        list_for_each_entry(tp, &probe_list, list) {
 336                if (trace_probe_within_module(tp, mod)) {
 337                        /* Don't need to check busy - this should have gone. */
 338                        __unregister_trace_probe(tp);
 339                        ret = __register_trace_probe(tp);
 340                        if (ret)
 341                                pr_warning("Failed to re-register probe %s on"
 342                                           "%s: %d\n",
 343                                           tp->call.name, mod->name, ret);
 344                }
 345        }
 346        mutex_unlock(&probe_lock);
 347
 348        return NOTIFY_DONE;
 349}
 350
 351static struct notifier_block trace_probe_module_nb = {
 352        .notifier_call = trace_probe_module_callback,
 353        .priority = 1   /* Invoked after kprobe module callback */
 354};
 355
 356static int create_trace_probe(int argc, char **argv)
 357{
 358        /*
 359         * Argument syntax:
 360         *  - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
 361         *  - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
 362         * Fetch args:
 363         *  $retval     : fetch return value
 364         *  $stack      : fetch stack address
 365         *  $stackN     : fetch Nth of stack (N:0-)
 366         *  @ADDR       : fetch memory at ADDR (ADDR should be in kernel)
 367         *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
 368         *  %REG        : fetch register REG
 369         * Dereferencing memory fetch:
 370         *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
 371         * Alias name of args:
 372         *  NAME=FETCHARG : set NAME as alias of FETCHARG.
 373         * Type of args:
 374         *  FETCHARG:TYPE : use TYPE instead of unsigned long.
 375         */
 376        struct trace_probe *tp;
 377        int i, ret = 0;
 378        bool is_return = false, is_delete = false;
 379        char *symbol = NULL, *event = NULL, *group = NULL;
 380        char *arg;
 381        unsigned long offset = 0;
 382        void *addr = NULL;
 383        char buf[MAX_EVENT_NAME_LEN];
 384
 385        /* argc must be >= 1 */
 386        if (argv[0][0] == 'p')
 387                is_return = false;
 388        else if (argv[0][0] == 'r')
 389                is_return = true;
 390        else if (argv[0][0] == '-')
 391                is_delete = true;
 392        else {
 393                pr_info("Probe definition must be started with 'p', 'r' or"
 394                        " '-'.\n");
 395                return -EINVAL;
 396        }
 397
 398        if (argv[0][1] == ':') {
 399                event = &argv[0][2];
 400                if (strchr(event, '/')) {
 401                        group = event;
 402                        event = strchr(group, '/') + 1;
 403                        event[-1] = '\0';
 404                        if (strlen(group) == 0) {
 405                                pr_info("Group name is not specified\n");
 406                                return -EINVAL;
 407                        }
 408                }
 409                if (strlen(event) == 0) {
 410                        pr_info("Event name is not specified\n");
 411                        return -EINVAL;
 412                }
 413        }
 414        if (!group)
 415                group = KPROBE_EVENT_SYSTEM;
 416
 417        if (is_delete) {
 418                if (!event) {
 419                        pr_info("Delete command needs an event name.\n");
 420                        return -EINVAL;
 421                }
 422                mutex_lock(&probe_lock);
 423                tp = find_trace_probe(event, group);
 424                if (!tp) {
 425                        mutex_unlock(&probe_lock);
 426                        pr_info("Event %s/%s doesn't exist.\n", group, event);
 427                        return -ENOENT;
 428                }
 429                /* delete an event */
 430                ret = unregister_trace_probe(tp);
 431                if (ret == 0)
 432                        free_trace_probe(tp);
 433                mutex_unlock(&probe_lock);
 434                return ret;
 435        }
 436
 437        if (argc < 2) {
 438                pr_info("Probe point is not specified.\n");
 439                return -EINVAL;
 440        }
 441        if (isdigit(argv[1][0])) {
 442                if (is_return) {
 443                        pr_info("Return probe point must be a symbol.\n");
 444                        return -EINVAL;
 445                }
 446                /* an address specified */
 447                ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
 448                if (ret) {
 449                        pr_info("Failed to parse address.\n");
 450                        return ret;
 451                }
 452        } else {
 453                /* a symbol specified */
 454                symbol = argv[1];
 455                /* TODO: support .init module functions */
 456                ret = traceprobe_split_symbol_offset(symbol, &offset);
 457                if (ret) {
 458                        pr_info("Failed to parse symbol.\n");
 459                        return ret;
 460                }
 461                if (offset && is_return) {
 462                        pr_info("Return probe must be used without offset.\n");
 463                        return -EINVAL;
 464                }
 465        }
 466        argc -= 2; argv += 2;
 467
 468        /* setup a probe */
 469        if (!event) {
 470                /* Make a new event name */
 471                if (symbol)
 472                        snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
 473                                 is_return ? 'r' : 'p', symbol, offset);
 474                else
 475                        snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
 476                                 is_return ? 'r' : 'p', addr);
 477                event = buf;
 478        }
 479        tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
 480                               is_return);
 481        if (IS_ERR(tp)) {
 482                pr_info("Failed to allocate trace_probe.(%d)\n",
 483                        (int)PTR_ERR(tp));
 484                return PTR_ERR(tp);
 485        }
 486
 487        /* parse arguments */
 488        ret = 0;
 489        for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
 490                /* Increment count for freeing args in error case */
 491                tp->nr_args++;
 492
 493                /* Parse argument name */
 494                arg = strchr(argv[i], '=');
 495                if (arg) {
 496                        *arg++ = '\0';
 497                        tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
 498                } else {
 499                        arg = argv[i];
 500                        /* If argument name is omitted, set "argN" */
 501                        snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
 502                        tp->args[i].name = kstrdup(buf, GFP_KERNEL);
 503                }
 504
 505                if (!tp->args[i].name) {
 506                        pr_info("Failed to allocate argument[%d] name.\n", i);
 507                        ret = -ENOMEM;
 508                        goto error;
 509                }
 510
 511                if (!is_good_name(tp->args[i].name)) {
 512                        pr_info("Invalid argument[%d] name: %s\n",
 513                                i, tp->args[i].name);
 514                        ret = -EINVAL;
 515                        goto error;
 516                }
 517
 518                if (traceprobe_conflict_field_name(tp->args[i].name,
 519                                                        tp->args, i)) {
 520                        pr_info("Argument[%d] name '%s' conflicts with "
 521                                "another field.\n", i, argv[i]);
 522                        ret = -EINVAL;
 523                        goto error;
 524                }
 525
 526                /* Parse fetch argument */
 527                ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
 528                                                is_return, true);
 529                if (ret) {
 530                        pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
 531                        goto error;
 532                }
 533        }
 534
 535        ret = register_trace_probe(tp);
 536        if (ret)
 537                goto error;
 538        return 0;
 539
 540error:
 541        free_trace_probe(tp);
 542        return ret;
 543}
 544
 545static int release_all_trace_probes(void)
 546{
 547        struct trace_probe *tp;
 548        int ret = 0;
 549
 550        mutex_lock(&probe_lock);
 551        /* Ensure no probe is in use. */
 552        list_for_each_entry(tp, &probe_list, list)
 553                if (trace_probe_is_enabled(tp)) {
 554                        ret = -EBUSY;
 555                        goto end;
 556                }
 557        /* TODO: Use batch unregistration */
 558        while (!list_empty(&probe_list)) {
 559                tp = list_entry(probe_list.next, struct trace_probe, list);
 560                unregister_trace_probe(tp);
 561                free_trace_probe(tp);
 562        }
 563
 564end:
 565        mutex_unlock(&probe_lock);
 566
 567        return ret;
 568}
 569
 570/* Probes listing interfaces */
 571static void *probes_seq_start(struct seq_file *m, loff_t *pos)
 572{
 573        mutex_lock(&probe_lock);
 574        return seq_list_start(&probe_list, *pos);
 575}
 576
 577static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
 578{
 579        return seq_list_next(v, &probe_list, pos);
 580}
 581
 582static void probes_seq_stop(struct seq_file *m, void *v)
 583{
 584        mutex_unlock(&probe_lock);
 585}
 586
 587static int probes_seq_show(struct seq_file *m, void *v)
 588{
 589        struct trace_probe *tp = v;
 590        int i;
 591
 592        seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
 593        seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
 594
 595        if (!tp->symbol)
 596                seq_printf(m, " 0x%p", tp->rp.kp.addr);
 597        else if (tp->rp.kp.offset)
 598                seq_printf(m, " %s+%u", trace_probe_symbol(tp),
 599                           tp->rp.kp.offset);
 600        else
 601                seq_printf(m, " %s", trace_probe_symbol(tp));
 602
 603        for (i = 0; i < tp->nr_args; i++)
 604                seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
 605        seq_printf(m, "\n");
 606
 607        return 0;
 608}
 609
 610static const struct seq_operations probes_seq_op = {
 611        .start  = probes_seq_start,
 612        .next   = probes_seq_next,
 613        .stop   = probes_seq_stop,
 614        .show   = probes_seq_show
 615};
 616
 617static int probes_open(struct inode *inode, struct file *file)
 618{
 619        int ret;
 620
 621        if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
 622                ret = release_all_trace_probes();
 623                if (ret < 0)
 624                        return ret;
 625        }
 626
 627        return seq_open(file, &probes_seq_op);
 628}
 629
 630static ssize_t probes_write(struct file *file, const char __user *buffer,
 631                            size_t count, loff_t *ppos)
 632{
 633        return traceprobe_probes_write(file, buffer, count, ppos,
 634                        create_trace_probe);
 635}
 636
 637static const struct file_operations kprobe_events_ops = {
 638        .owner          = THIS_MODULE,
 639        .open           = probes_open,
 640        .read           = seq_read,
 641        .llseek         = seq_lseek,
 642        .release        = seq_release,
 643        .write          = probes_write,
 644};
 645
 646/* Probes profiling interfaces */
 647static int probes_profile_seq_show(struct seq_file *m, void *v)
 648{
 649        struct trace_probe *tp = v;
 650
 651        seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
 652                   tp->rp.kp.nmissed);
 653
 654        return 0;
 655}
 656
 657static const struct seq_operations profile_seq_op = {
 658        .start  = probes_seq_start,
 659        .next   = probes_seq_next,
 660        .stop   = probes_seq_stop,
 661        .show   = probes_profile_seq_show
 662};
 663
 664static int profile_open(struct inode *inode, struct file *file)
 665{
 666        return seq_open(file, &profile_seq_op);
 667}
 668
 669static const struct file_operations kprobe_profile_ops = {
 670        .owner          = THIS_MODULE,
 671        .open           = profile_open,
 672        .read           = seq_read,
 673        .llseek         = seq_lseek,
 674        .release        = seq_release,
 675};
 676
 677/* Sum up total data length for dynamic arraies (strings) */
 678static __kprobes int __get_data_size(struct trace_probe *tp,
 679                                     struct pt_regs *regs)
 680{
 681        int i, ret = 0;
 682        u32 len;
 683
 684        for (i = 0; i < tp->nr_args; i++)
 685                if (unlikely(tp->args[i].fetch_size.fn)) {
 686                        call_fetch(&tp->args[i].fetch_size, regs, &len);
 687                        ret += len;
 688                }
 689
 690        return ret;
 691}
 692
 693/* Store the value of each argument */
 694static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
 695                                       struct pt_regs *regs,
 696                                       u8 *data, int maxlen)
 697{
 698        int i;
 699        u32 end = tp->size;
 700        u32 *dl;        /* Data (relative) location */
 701
 702        for (i = 0; i < tp->nr_args; i++) {
 703                if (unlikely(tp->args[i].fetch_size.fn)) {
 704                        /*
 705                         * First, we set the relative location and
 706                         * maximum data length to *dl
 707                         */
 708                        dl = (u32 *)(data + tp->args[i].offset);
 709                        *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
 710                        /* Then try to fetch string or dynamic array data */
 711                        call_fetch(&tp->args[i].fetch, regs, dl);
 712                        /* Reduce maximum length */
 713                        end += get_rloc_len(*dl);
 714                        maxlen -= get_rloc_len(*dl);
 715                        /* Trick here, convert data_rloc to data_loc */
 716                        *dl = convert_rloc_to_loc(*dl,
 717                                 ent_size + tp->args[i].offset);
 718                } else
 719                        /* Just fetching data normally */
 720                        call_fetch(&tp->args[i].fetch, regs,
 721                                   data + tp->args[i].offset);
 722        }
 723}
 724
 725/* Kprobe handler */
 726static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
 727{
 728        struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 729        struct kprobe_trace_entry_head *entry;
 730        struct ring_buffer_event *event;
 731        struct ring_buffer *buffer;
 732        int size, dsize, pc;
 733        unsigned long irq_flags;
 734        struct ftrace_event_call *call = &tp->call;
 735
 736        tp->nhit++;
 737
 738        local_save_flags(irq_flags);
 739        pc = preempt_count();
 740
 741        dsize = __get_data_size(tp, regs);
 742        size = sizeof(*entry) + tp->size + dsize;
 743
 744        event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
 745                                                  size, irq_flags, pc);
 746        if (!event)
 747                return;
 748
 749        entry = ring_buffer_event_data(event);
 750        entry->ip = (unsigned long)kp->addr;
 751        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 752
 753        if (!filter_current_check_discard(buffer, call, entry, event))
 754                trace_nowake_buffer_unlock_commit_regs(buffer, event,
 755                                                       irq_flags, pc, regs);
 756}
 757
 758/* Kretprobe handler */
 759static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
 760                                          struct pt_regs *regs)
 761{
 762        struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
 763        struct kretprobe_trace_entry_head *entry;
 764        struct ring_buffer_event *event;
 765        struct ring_buffer *buffer;
 766        int size, pc, dsize;
 767        unsigned long irq_flags;
 768        struct ftrace_event_call *call = &tp->call;
 769
 770        local_save_flags(irq_flags);
 771        pc = preempt_count();
 772
 773        dsize = __get_data_size(tp, regs);
 774        size = sizeof(*entry) + tp->size + dsize;
 775
 776        event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
 777                                                  size, irq_flags, pc);
 778        if (!event)
 779                return;
 780
 781        entry = ring_buffer_event_data(event);
 782        entry->func = (unsigned long)tp->rp.kp.addr;
 783        entry->ret_ip = (unsigned long)ri->ret_addr;
 784        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
 785
 786        if (!filter_current_check_discard(buffer, call, entry, event))
 787                trace_nowake_buffer_unlock_commit_regs(buffer, event,
 788                                                       irq_flags, pc, regs);
 789}
 790
 791/* Event entry printers */
 792enum print_line_t
 793print_kprobe_event(struct trace_iterator *iter, int flags,
 794                   struct trace_event *event)
 795{
 796        struct kprobe_trace_entry_head *field;
 797        struct trace_seq *s = &iter->seq;
 798        struct trace_probe *tp;
 799        u8 *data;
 800        int i;
 801
 802        field = (struct kprobe_trace_entry_head *)iter->ent;
 803        tp = container_of(event, struct trace_probe, call.event);
 804
 805        if (!trace_seq_printf(s, "%s: (", tp->call.name))
 806                goto partial;
 807
 808        if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
 809                goto partial;
 810
 811        if (!trace_seq_puts(s, ")"))
 812                goto partial;
 813
 814        data = (u8 *)&field[1];
 815        for (i = 0; i < tp->nr_args; i++)
 816                if (!tp->args[i].type->print(s, tp->args[i].name,
 817                                             data + tp->args[i].offset, field))
 818                        goto partial;
 819
 820        if (!trace_seq_puts(s, "\n"))
 821                goto partial;
 822
 823        return TRACE_TYPE_HANDLED;
 824partial:
 825        return TRACE_TYPE_PARTIAL_LINE;
 826}
 827
 828enum print_line_t
 829print_kretprobe_event(struct trace_iterator *iter, int flags,
 830                      struct trace_event *event)
 831{
 832        struct kretprobe_trace_entry_head *field;
 833        struct trace_seq *s = &iter->seq;
 834        struct trace_probe *tp;
 835        u8 *data;
 836        int i;
 837
 838        field = (struct kretprobe_trace_entry_head *)iter->ent;
 839        tp = container_of(event, struct trace_probe, call.event);
 840
 841        if (!trace_seq_printf(s, "%s: (", tp->call.name))
 842                goto partial;
 843
 844        if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
 845                goto partial;
 846
 847        if (!trace_seq_puts(s, " <- "))
 848                goto partial;
 849
 850        if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
 851                goto partial;
 852
 853        if (!trace_seq_puts(s, ")"))
 854                goto partial;
 855
 856        data = (u8 *)&field[1];
 857        for (i = 0; i < tp->nr_args; i++)
 858                if (!tp->args[i].type->print(s, tp->args[i].name,
 859                                             data + tp->args[i].offset, field))
 860                        goto partial;
 861
 862        if (!trace_seq_puts(s, "\n"))
 863                goto partial;
 864
 865        return TRACE_TYPE_HANDLED;
 866partial:
 867        return TRACE_TYPE_PARTIAL_LINE;
 868}
 869
 870
 871static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
 872{
 873        int ret, i;
 874        struct kprobe_trace_entry_head field;
 875        struct trace_probe *tp = (struct trace_probe *)event_call->data;
 876
 877        DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
 878        /* Set argument names as fields */
 879        for (i = 0; i < tp->nr_args; i++) {
 880                ret = trace_define_field(event_call, tp->args[i].type->fmttype,
 881                                         tp->args[i].name,
 882                                         sizeof(field) + tp->args[i].offset,
 883                                         tp->args[i].type->size,
 884                                         tp->args[i].type->is_signed,
 885                                         FILTER_OTHER);
 886                if (ret)
 887                        return ret;
 888        }
 889        return 0;
 890}
 891
 892static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
 893{
 894        int ret, i;
 895        struct kretprobe_trace_entry_head field;
 896        struct trace_probe *tp = (struct trace_probe *)event_call->data;
 897
 898        DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
 899        DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
 900        /* Set argument names as fields */
 901        for (i = 0; i < tp->nr_args; i++) {
 902                ret = trace_define_field(event_call, tp->args[i].type->fmttype,
 903                                         tp->args[i].name,
 904                                         sizeof(field) + tp->args[i].offset,
 905                                         tp->args[i].type->size,
 906                                         tp->args[i].type->is_signed,
 907                                         FILTER_OTHER);
 908                if (ret)
 909                        return ret;
 910        }
 911        return 0;
 912}
 913
 914static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
 915{
 916        int i;
 917        int pos = 0;
 918
 919        const char *fmt, *arg;
 920
 921        if (!trace_probe_is_return(tp)) {
 922                fmt = "(%lx)";
 923                arg = "REC->" FIELD_STRING_IP;
 924        } else {
 925                fmt = "(%lx <- %lx)";
 926                arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
 927        }
 928
 929        /* When len=0, we just calculate the needed length */
 930#define LEN_OR_ZERO (len ? len - pos : 0)
 931
 932        pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
 933
 934        for (i = 0; i < tp->nr_args; i++) {
 935                pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
 936                                tp->args[i].name, tp->args[i].type->fmt);
 937        }
 938
 939        pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
 940
 941        for (i = 0; i < tp->nr_args; i++) {
 942                if (strcmp(tp->args[i].type->name, "string") == 0)
 943                        pos += snprintf(buf + pos, LEN_OR_ZERO,
 944                                        ", __get_str(%s)",
 945                                        tp->args[i].name);
 946                else
 947                        pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
 948                                        tp->args[i].name);
 949        }
 950
 951#undef LEN_OR_ZERO
 952
 953        /* return the length of print_fmt */
 954        return pos;
 955}
 956
 957static int set_print_fmt(struct trace_probe *tp)
 958{
 959        int len;
 960        char *print_fmt;
 961
 962        /* First: called with 0 length to calculate the needed length */
 963        len = __set_print_fmt(tp, NULL, 0);
 964        print_fmt = kmalloc(len + 1, GFP_KERNEL);
 965        if (!print_fmt)
 966                return -ENOMEM;
 967
 968        /* Second: actually write the @print_fmt */
 969        __set_print_fmt(tp, print_fmt, len + 1);
 970        tp->call.print_fmt = print_fmt;
 971
 972        return 0;
 973}
 974
 975#ifdef CONFIG_PERF_EVENTS
 976
 977/* Kprobe profile handler */
 978static __kprobes void kprobe_perf_func(struct kprobe *kp,
 979                                         struct pt_regs *regs)
 980{
 981        struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
 982        struct ftrace_event_call *call = &tp->call;
 983        struct kprobe_trace_entry_head *entry;
 984        struct hlist_head *head;
 985        int size, __size, dsize;
 986        int rctx;
 987
 988        dsize = __get_data_size(tp, regs);
 989        __size = sizeof(*entry) + tp->size + dsize;
 990        size = ALIGN(__size + sizeof(u32), sizeof(u64));
 991        size -= sizeof(u32);
 992        if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
 993                     "profile buffer not large enough"))
 994                return;
 995
 996        entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
 997        if (!entry)
 998                return;
 999
1000        entry->ip = (unsigned long)kp->addr;
1001        memset(&entry[1], 0, dsize);
1002        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1003
1004        head = this_cpu_ptr(call->perf_events);
1005        perf_trace_buf_submit(entry, size, rctx,
1006                                        entry->ip, 1, regs, head, NULL);
1007}
1008
1009/* Kretprobe profile handler */
1010static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1011                                            struct pt_regs *regs)
1012{
1013        struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1014        struct ftrace_event_call *call = &tp->call;
1015        struct kretprobe_trace_entry_head *entry;
1016        struct hlist_head *head;
1017        int size, __size, dsize;
1018        int rctx;
1019
1020        dsize = __get_data_size(tp, regs);
1021        __size = sizeof(*entry) + tp->size + dsize;
1022        size = ALIGN(__size + sizeof(u32), sizeof(u64));
1023        size -= sizeof(u32);
1024        if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1025                     "profile buffer not large enough"))
1026                return;
1027
1028        entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1029        if (!entry)
1030                return;
1031
1032        entry->func = (unsigned long)tp->rp.kp.addr;
1033        entry->ret_ip = (unsigned long)ri->ret_addr;
1034        store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1035
1036        head = this_cpu_ptr(call->perf_events);
1037        perf_trace_buf_submit(entry, size, rctx,
1038                                        entry->ret_ip, 1, regs, head, NULL);
1039}
1040#endif  /* CONFIG_PERF_EVENTS */
1041
1042static __kprobes
1043int kprobe_register(struct ftrace_event_call *event,
1044                    enum trace_reg type, void *data)
1045{
1046        struct trace_probe *tp = (struct trace_probe *)event->data;
1047
1048        switch (type) {
1049        case TRACE_REG_REGISTER:
1050                return enable_trace_probe(tp, TP_FLAG_TRACE);
1051        case TRACE_REG_UNREGISTER:
1052                disable_trace_probe(tp, TP_FLAG_TRACE);
1053                return 0;
1054
1055#ifdef CONFIG_PERF_EVENTS
1056        case TRACE_REG_PERF_REGISTER:
1057                return enable_trace_probe(tp, TP_FLAG_PROFILE);
1058        case TRACE_REG_PERF_UNREGISTER:
1059                disable_trace_probe(tp, TP_FLAG_PROFILE);
1060                return 0;
1061        case TRACE_REG_PERF_OPEN:
1062        case TRACE_REG_PERF_CLOSE:
1063        case TRACE_REG_PERF_ADD:
1064        case TRACE_REG_PERF_DEL:
1065                return 0;
1066#endif
1067        }
1068        return 0;
1069}
1070
1071static __kprobes
1072int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1073{
1074        struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1075
1076        if (tp->flags & TP_FLAG_TRACE)
1077                kprobe_trace_func(kp, regs);
1078#ifdef CONFIG_PERF_EVENTS
1079        if (tp->flags & TP_FLAG_PROFILE)
1080                kprobe_perf_func(kp, regs);
1081#endif
1082        return 0;       /* We don't tweek kernel, so just return 0 */
1083}
1084
1085static __kprobes
1086int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1087{
1088        struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1089
1090        if (tp->flags & TP_FLAG_TRACE)
1091                kretprobe_trace_func(ri, regs);
1092#ifdef CONFIG_PERF_EVENTS
1093        if (tp->flags & TP_FLAG_PROFILE)
1094                kretprobe_perf_func(ri, regs);
1095#endif
1096        return 0;       /* We don't tweek kernel, so just return 0 */
1097}
1098
1099static struct trace_event_functions kretprobe_funcs = {
1100        .trace          = print_kretprobe_event
1101};
1102
1103static struct trace_event_functions kprobe_funcs = {
1104        .trace          = print_kprobe_event
1105};
1106
1107static int register_probe_event(struct trace_probe *tp)
1108{
1109        struct ftrace_event_call *call = &tp->call;
1110        int ret;
1111
1112        /* Initialize ftrace_event_call */
1113        INIT_LIST_HEAD(&call->class->fields);
1114        if (trace_probe_is_return(tp)) {
1115                call->event.funcs = &kretprobe_funcs;
1116                call->class->define_fields = kretprobe_event_define_fields;
1117        } else {
1118                call->event.funcs = &kprobe_funcs;
1119                call->class->define_fields = kprobe_event_define_fields;
1120        }
1121        if (set_print_fmt(tp) < 0)
1122                return -ENOMEM;
1123        ret = register_ftrace_event(&call->event);
1124        if (!ret) {
1125                kfree(call->print_fmt);
1126                return -ENODEV;
1127        }
1128        call->flags = 0;
1129        call->class->reg = kprobe_register;
1130        call->data = tp;
1131        ret = trace_add_event_call(call);
1132        if (ret) {
1133                pr_info("Failed to register kprobe event: %s\n", call->name);
1134                kfree(call->print_fmt);
1135                unregister_ftrace_event(&call->event);
1136        }
1137        return ret;
1138}
1139
1140static void unregister_probe_event(struct trace_probe *tp)
1141{
1142        /* tp->event is unregistered in trace_remove_event_call() */
1143        trace_remove_event_call(&tp->call);
1144        kfree(tp->call.print_fmt);
1145}
1146
1147/* Make a debugfs interface for controlling probe points */
1148static __init int init_kprobe_trace(void)
1149{
1150        struct dentry *d_tracer;
1151        struct dentry *entry;
1152
1153        if (register_module_notifier(&trace_probe_module_nb))
1154                return -EINVAL;
1155
1156        d_tracer = tracing_init_dentry();
1157        if (!d_tracer)
1158                return 0;
1159
1160        entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1161                                    NULL, &kprobe_events_ops);
1162
1163        /* Event list interface */
1164        if (!entry)
1165                pr_warning("Could not create debugfs "
1166                           "'kprobe_events' entry\n");
1167
1168        /* Profile interface */
1169        entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1170                                    NULL, &kprobe_profile_ops);
1171
1172        if (!entry)
1173                pr_warning("Could not create debugfs "
1174                           "'kprobe_profile' entry\n");
1175        return 0;
1176}
1177fs_initcall(init_kprobe_trace);
1178
1179
1180#ifdef CONFIG_FTRACE_STARTUP_TEST
1181
1182/*
1183 * The "__used" keeps gcc from removing the function symbol
1184 * from the kallsyms table.
1185 */
1186static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
1187                                               int a4, int a5, int a6)
1188{
1189        return a1 + a2 + a3 + a4 + a5 + a6;
1190}
1191
1192static __init int kprobe_trace_self_tests_init(void)
1193{
1194        int ret, warn = 0;
1195        int (*target)(int, int, int, int, int, int);
1196        struct trace_probe *tp;
1197
1198        target = kprobe_trace_selftest_target;
1199
1200        pr_info("Testing kprobe tracing: ");
1201
1202        ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
1203                                  "$stack $stack0 +0($stack)",
1204                                  create_trace_probe);
1205        if (WARN_ON_ONCE(ret)) {
1206                pr_warning("error on probing function entry.\n");
1207                warn++;
1208        } else {
1209                /* Enable trace point */
1210                tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1211                if (WARN_ON_ONCE(tp == NULL)) {
1212                        pr_warning("error on getting new probe.\n");
1213                        warn++;
1214                } else
1215                        enable_trace_probe(tp, TP_FLAG_TRACE);
1216        }
1217
1218        ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1219                                  "$retval", create_trace_probe);
1220        if (WARN_ON_ONCE(ret)) {
1221                pr_warning("error on probing function return.\n");
1222                warn++;
1223        } else {
1224                /* Enable trace point */
1225                tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1226                if (WARN_ON_ONCE(tp == NULL)) {
1227                        pr_warning("error on getting new probe.\n");
1228                        warn++;
1229                } else
1230                        enable_trace_probe(tp, TP_FLAG_TRACE);
1231        }
1232
1233        if (warn)
1234                goto end;
1235
1236        ret = target(1, 2, 3, 4, 5, 6);
1237
1238        /* Disable trace points before removing it */
1239        tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1240        if (WARN_ON_ONCE(tp == NULL)) {
1241                pr_warning("error on getting test probe.\n");
1242                warn++;
1243        } else
1244                disable_trace_probe(tp, TP_FLAG_TRACE);
1245
1246        tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1247        if (WARN_ON_ONCE(tp == NULL)) {
1248                pr_warning("error on getting 2nd test probe.\n");
1249                warn++;
1250        } else
1251                disable_trace_probe(tp, TP_FLAG_TRACE);
1252
1253        ret = traceprobe_command("-:testprobe", create_trace_probe);
1254        if (WARN_ON_ONCE(ret)) {
1255                pr_warning("error on deleting a probe.\n");
1256                warn++;
1257        }
1258
1259        ret = traceprobe_command("-:testprobe2", create_trace_probe);
1260        if (WARN_ON_ONCE(ret)) {
1261                pr_warning("error on deleting a probe.\n");
1262                warn++;
1263        }
1264
1265end:
1266        release_all_trace_probes();
1267        if (warn)
1268                pr_cont("NG: Some tests are failed. Please check them.\n");
1269        else
1270                pr_cont("OK\n");
1271        return 0;
1272}
1273
1274late_initcall(kprobe_trace_self_tests_init);
1275
1276#endif
1277
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.