linux/block/blktrace.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful,
   9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 * GNU General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  16 *
  17 */
  18#include <linux/kernel.h>
  19#include <linux/blkdev.h>
  20#include <linux/blktrace_api.h>
  21#include <linux/percpu.h>
  22#include <linux/init.h>
  23#include <linux/mutex.h>
  24#include <linux/debugfs.h>
  25#include <linux/time.h>
  26#include <trace/block.h>
  27#include <asm/uaccess.h>
  28
  29static unsigned int blktrace_seq __read_mostly = 1;
  30
  31/* Global reference count of probes */
  32static DEFINE_MUTEX(blk_probe_mutex);
  33static atomic_t blk_probes_ref = ATOMIC_INIT(0);
  34
  35static int blk_register_tracepoints(void);
  36static void blk_unregister_tracepoints(void);
  37
  38/*
  39 * Send out a notify message.
  40 */
  41static void trace_note(struct blk_trace *bt, pid_t pid, int action,
  42                       const void *data, size_t len)
  43{
  44        struct blk_io_trace *t;
  45
  46        t = relay_reserve(bt->rchan, sizeof(*t) + len);
  47        if (t) {
  48                const int cpu = smp_processor_id();
  49
  50                t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
  51                t->time = ktime_to_ns(ktime_get());
  52                t->device = bt->dev;
  53                t->action = action;
  54                t->pid = pid;
  55                t->cpu = cpu;
  56                t->pdu_len = len;
  57                memcpy((void *) t + sizeof(*t), data, len);
  58        }
  59}
  60
  61/*
  62 * Send out a notify for this process, if we haven't done so since a trace
  63 * started
  64 */
  65static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
  66{
  67        tsk->btrace_seq = blktrace_seq;
  68        trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
  69}
  70
  71static void trace_note_time(struct blk_trace *bt)
  72{
  73        struct timespec now;
  74        unsigned long flags;
  75        u32 words[2];
  76
  77        getnstimeofday(&now);
  78        words[0] = now.tv_sec;
  79        words[1] = now.tv_nsec;
  80
  81        local_irq_save(flags);
  82        trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
  83        local_irq_restore(flags);
  84}
  85
  86void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
  87{
  88        int n;
  89        va_list args;
  90        unsigned long flags;
  91        char *buf;
  92
  93        local_irq_save(flags);
  94        buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
  95        va_start(args, fmt);
  96        n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
  97        va_end(args);
  98
  99        trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
 100        local_irq_restore(flags);
 101}
 102EXPORT_SYMBOL_GPL(__trace_note_message);
 103
 104static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 105                         pid_t pid)
 106{
 107        if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
 108                return 1;
 109        if (sector < bt->start_lba || sector > bt->end_lba)
 110                return 1;
 111        if (bt->pid && pid != bt->pid)
 112                return 1;
 113
 114        return 0;
 115}
 116
 117/*
 118 * Data direction bit lookup
 119 */
 120static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
 121
 122/* The ilog2() calls fall out because they're constant */
 123#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
 124          (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
 125
 126/*
 127 * The worker for the various blk_add_trace*() types. Fills out a
 128 * blk_io_trace structure and places it in a per-cpu subbuffer.
 129 */
 130static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 131                     int rw, u32 what, int error, int pdu_len, void *pdu_data)
 132{
 133        struct task_struct *tsk = current;
 134        struct blk_io_trace *t;
 135        unsigned long flags;
 136        unsigned long *sequence;
 137        pid_t pid;
 138        int cpu;
 139
 140        if (unlikely(bt->trace_state != Blktrace_running))
 141                return;
 142
 143        what |= ddir_act[rw & WRITE];
 144        what |= MASK_TC_BIT(rw, BARRIER);
 145        what |= MASK_TC_BIT(rw, SYNCIO);
 146        what |= MASK_TC_BIT(rw, AHEAD);
 147        what |= MASK_TC_BIT(rw, META);
 148        what |= MASK_TC_BIT(rw, DISCARD);
 149
 150        pid = tsk->pid;
 151        if (unlikely(act_log_check(bt, what, sector, pid)))
 152                return;
 153
 154        /*
 155         * A word about the locking here - we disable interrupts to reserve
 156         * some space in the relay per-cpu buffer, to prevent an irq
 157         * from coming in and stepping on our toes.
 158         */
 159        local_irq_save(flags);
 160
 161        if (unlikely(tsk->btrace_seq != blktrace_seq))
 162                trace_note_tsk(bt, tsk);
 163
 164        t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
 165        if (t) {
 166                cpu = smp_processor_id();
 167                sequence = per_cpu_ptr(bt->sequence, cpu);
 168
 169                t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
 170                t->sequence = ++(*sequence);
 171                t->time = ktime_to_ns(ktime_get());
 172                t->sector = sector;
 173                t->bytes = bytes;
 174                t->action = what;
 175                t->pid = pid;
 176                t->device = bt->dev;
 177                t->cpu = cpu;
 178                t->error = error;
 179                t->pdu_len = pdu_len;
 180
 181                if (pdu_len)
 182                        memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
 183        }
 184
 185        local_irq_restore(flags);
 186}
 187
 188static struct dentry *blk_tree_root;
 189static DEFINE_MUTEX(blk_tree_mutex);
 190
 191static void blk_trace_cleanup(struct blk_trace *bt)
 192{
 193        debugfs_remove(bt->msg_file);
 194        debugfs_remove(bt->dropped_file);
 195        relay_close(bt->rchan);
 196        free_percpu(bt->sequence);
 197        free_percpu(bt->msg_data);
 198        kfree(bt);
 199        mutex_lock(&blk_probe_mutex);
 200        if (atomic_dec_and_test(&blk_probes_ref))
 201                blk_unregister_tracepoints();
 202        mutex_unlock(&blk_probe_mutex);
 203}
 204
 205int blk_trace_remove(struct request_queue *q)
 206{
 207        struct blk_trace *bt;
 208
 209        bt = xchg(&q->blk_trace, NULL);
 210        if (!bt)
 211                return -EINVAL;
 212
 213        if (bt->trace_state == Blktrace_setup ||
 214            bt->trace_state == Blktrace_stopped)
 215                blk_trace_cleanup(bt);
 216
 217        return 0;
 218}
 219EXPORT_SYMBOL_GPL(blk_trace_remove);
 220
 221static int blk_dropped_open(struct inode *inode, struct file *filp)
 222{
 223        filp->private_data = inode->i_private;
 224
 225        return 0;
 226}
 227
 228static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
 229                                size_t count, loff_t *ppos)
 230{
 231        struct blk_trace *bt = filp->private_data;
 232        char buf[16];
 233
 234        snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
 235
 236        return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
 237}
 238
 239static const struct file_operations blk_dropped_fops = {
 240        .owner =        THIS_MODULE,
 241        .open =         blk_dropped_open,
 242        .read =         blk_dropped_read,
 243};
 244
 245static int blk_msg_open(struct inode *inode, struct file *filp)
 246{
 247        filp->private_data = inode->i_private;
 248
 249        return 0;
 250}
 251
 252static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
 253                                size_t count, loff_t *ppos)
 254{
 255        char *msg;
 256        struct blk_trace *bt;
 257
 258        if (count > BLK_TN_MAX_MSG)
 259                return -EINVAL;
 260
 261        msg = kmalloc(count, GFP_KERNEL);
 262        if (msg == NULL)
 263                return -ENOMEM;
 264
 265        if (copy_from_user(msg, buffer, count)) {
 266                kfree(msg);
 267                return -EFAULT;
 268        }
 269
 270        bt = filp->private_data;
 271        __trace_note_message(bt, "%s", msg);
 272        kfree(msg);
 273
 274        return count;
 275}
 276
 277static const struct file_operations blk_msg_fops = {
 278        .owner =        THIS_MODULE,
 279        .open =         blk_msg_open,
 280        .write =        blk_msg_write,
 281};
 282
 283/*
 284 * Keep track of how many times we encountered a full subbuffer, to aid
 285 * the user space app in telling how many lost events there were.
 286 */
 287static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
 288                                     void *prev_subbuf, size_t prev_padding)
 289{
 290        struct blk_trace *bt;
 291
 292        if (!relay_buf_full(buf))
 293                return 1;
 294
 295        bt = buf->chan->private_data;
 296        atomic_inc(&bt->dropped);
 297        return 0;
 298}
 299
 300static int blk_remove_buf_file_callback(struct dentry *dentry)
 301{
 302        struct dentry *parent = dentry->d_parent;
 303        debugfs_remove(dentry);
 304
 305        /*
 306        * this will fail for all but the last file, but that is ok. what we
 307        * care about is the top level buts->name directory going away, when
 308        * the last trace file is gone. Then we don't have to rmdir() that
 309        * manually on trace stop, so it nicely solves the issue with
 310        * force killing of running traces.
 311        */
 312
 313        debugfs_remove(parent);
 314        return 0;
 315}
 316
 317static struct dentry *blk_create_buf_file_callback(const char *filename,
 318                                                   struct dentry *parent,
 319                                                   int mode,
 320                                                   struct rchan_buf *buf,
 321                                                   int *is_global)
 322{
 323        return debugfs_create_file(filename, mode, parent, buf,
 324                                        &relay_file_operations);
 325}
 326
 327static struct rchan_callbacks blk_relay_callbacks = {
 328        .subbuf_start           = blk_subbuf_start_callback,
 329        .create_buf_file        = blk_create_buf_file_callback,
 330        .remove_buf_file        = blk_remove_buf_file_callback,
 331};
 332
 333/*
 334 * Setup everything required to start tracing
 335 */
 336int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 337                        struct blk_user_trace_setup *buts)
 338{
 339        struct blk_trace *old_bt, *bt = NULL;
 340        struct dentry *dir = NULL;
 341        int ret, i;
 342
 343        if (!buts->buf_size || !buts->buf_nr)
 344                return -EINVAL;
 345
 346        strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
 347        buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
 348
 349        /*
 350         * some device names have larger paths - convert the slashes
 351         * to underscores for this to work as expected
 352         */
 353        for (i = 0; i < strlen(buts->name); i++)
 354                if (buts->name[i] == '/')
 355                        buts->name[i] = '_';
 356
 357        ret = -ENOMEM;
 358        bt = kzalloc(sizeof(*bt), GFP_KERNEL);
 359        if (!bt)
 360                goto err;
 361
 362        bt->sequence = alloc_percpu(unsigned long);
 363        if (!bt->sequence)
 364                goto err;
 365
 366        bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
 367        if (!bt->msg_data)
 368                goto err;
 369
 370        ret = -ENOENT;
 371
 372        if (!blk_tree_root) {
 373                blk_tree_root = debugfs_create_dir("block", NULL);
 374                if (!blk_tree_root)
 375                        return -ENOMEM;
 376        }
 377
 378        dir = debugfs_create_dir(buts->name, blk_tree_root);
 379
 380        if (!dir)
 381                goto err;
 382
 383        bt->dir = dir;
 384        bt->dev = dev;
 385        atomic_set(&bt->dropped, 0);
 386
 387        ret = -EIO;
 388        bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
 389        if (!bt->dropped_file)
 390                goto err;
 391
 392        bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
 393        if (!bt->msg_file)
 394                goto err;
 395
 396        bt->rchan = relay_open("trace", dir, buts->buf_size,
 397                                buts->buf_nr, &blk_relay_callbacks, bt);
 398        if (!bt->rchan)
 399                goto err;
 400
 401        bt->act_mask = buts->act_mask;
 402        if (!bt->act_mask)
 403                bt->act_mask = (u16) -1;
 404
 405        bt->start_lba = buts->start_lba;
 406        bt->end_lba = buts->end_lba;
 407        if (!bt->end_lba)
 408                bt->end_lba = -1ULL;
 409
 410        bt->pid = buts->pid;
 411        bt->trace_state = Blktrace_setup;
 412
 413        mutex_lock(&blk_probe_mutex);
 414        if (atomic_add_return(1, &blk_probes_ref) == 1) {
 415                ret = blk_register_tracepoints();
 416                if (ret)
 417                        goto probe_err;
 418        }
 419        mutex_unlock(&blk_probe_mutex);
 420
 421        ret = -EBUSY;
 422        old_bt = xchg(&q->blk_trace, bt);
 423        if (old_bt) {
 424                (void) xchg(&q->blk_trace, old_bt);
 425                goto err;
 426        }
 427
 428        return 0;
 429probe_err:
 430        atomic_dec(&blk_probes_ref);
 431        mutex_unlock(&blk_probe_mutex);
 432err:
 433        if (bt) {
 434                if (bt->msg_file)
 435                        debugfs_remove(bt->msg_file);
 436                if (bt->dropped_file)
 437                        debugfs_remove(bt->dropped_file);
 438                free_percpu(bt->sequence);
 439                free_percpu(bt->msg_data);
 440                if (bt->rchan)
 441                        relay_close(bt->rchan);
 442                kfree(bt);
 443        }
 444        return ret;
 445}
 446
 447int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 448                    char __user *arg)
 449{
 450        struct blk_user_trace_setup buts;
 451        int ret;
 452
 453        ret = copy_from_user(&buts, arg, sizeof(buts));
 454        if (ret)
 455                return -EFAULT;
 456
 457        ret = do_blk_trace_setup(q, name, dev, &buts);
 458        if (ret)
 459                return ret;
 460
 461        if (copy_to_user(arg, &buts, sizeof(buts)))
 462                return -EFAULT;
 463
 464        return 0;
 465}
 466EXPORT_SYMBOL_GPL(blk_trace_setup);
 467
 468int blk_trace_startstop(struct request_queue *q, int start)
 469{
 470        struct blk_trace *bt;
 471        int ret;
 472
 473        if ((bt = q->blk_trace) == NULL)
 474                return -EINVAL;
 475
 476        /*
 477         * For starting a trace, we can transition from a setup or stopped
 478         * trace. For stopping a trace, the state must be running
 479         */
 480        ret = -EINVAL;
 481        if (start) {
 482                if (bt->trace_state == Blktrace_setup ||
 483                    bt->trace_state == Blktrace_stopped) {
 484                        blktrace_seq++;
 485                        smp_mb();
 486                        bt->trace_state = Blktrace_running;
 487
 488                        trace_note_time(bt);
 489                        ret = 0;
 490                }
 491        } else {
 492                if (bt->trace_state == Blktrace_running) {
 493                        bt->trace_state = Blktrace_stopped;
 494                        relay_flush(bt->rchan);
 495                        ret = 0;
 496                }
 497        }
 498
 499        return ret;
 500}
 501EXPORT_SYMBOL_GPL(blk_trace_startstop);
 502
 503/**
 504 * blk_trace_ioctl: - handle the ioctls associated with tracing
 505 * @bdev:       the block device
 506 * @cmd:        the ioctl cmd
 507 * @arg:        the argument data, if any
 508 *
 509 **/
 510int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 511{
 512        struct request_queue *q;
 513        int ret, start = 0;
 514        char b[BDEVNAME_SIZE];
 515
 516        q = bdev_get_queue(bdev);
 517        if (!q)
 518                return -ENXIO;
 519
 520        mutex_lock(&bdev->bd_mutex);
 521
 522        switch (cmd) {
 523        case BLKTRACESETUP:
 524                bdevname(bdev, b);
 525                ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
 526                break;
 527        case BLKTRACESTART:
 528                start = 1;
 529        case BLKTRACESTOP:
 530                ret = blk_trace_startstop(q, start);
 531                break;
 532        case BLKTRACETEARDOWN:
 533                ret = blk_trace_remove(q);
 534                break;
 535        default:
 536                ret = -ENOTTY;
 537                break;
 538        }
 539
 540        mutex_unlock(&bdev->bd_mutex);
 541        return ret;
 542}
 543
 544/**
 545 * blk_trace_shutdown: - stop and cleanup trace structures
 546 * @q:    the request queue associated with the device
 547 *
 548 **/
 549void blk_trace_shutdown(struct request_queue *q)
 550{
 551        if (q->blk_trace) {
 552                blk_trace_startstop(q, 0);
 553                blk_trace_remove(q);
 554        }
 555}
 556
 557/*
 558 * blktrace probes
 559 */
 560
 561/**
 562 * blk_add_trace_rq - Add a trace for a request oriented action
 563 * @q:          queue the io is for
 564 * @rq:         the source request
 565 * @what:       the action
 566 *
 567 * Description:
 568 *     Records an action against a request. Will log the bio offset + size.
 569 *
 570 **/
 571static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 572                                    u32 what)
 573{
 574        struct blk_trace *bt = q->blk_trace;
 575        int rw = rq->cmd_flags & 0x03;
 576
 577        if (likely(!bt))
 578                return;
 579
 580        if (blk_discard_rq(rq))
 581                rw |= (1 << BIO_RW_DISCARD);
 582
 583        if (blk_pc_request(rq)) {
 584                what |= BLK_TC_ACT(BLK_TC_PC);
 585                __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
 586                                sizeof(rq->cmd), rq->cmd);
 587        } else  {
 588                what |= BLK_TC_ACT(BLK_TC_FS);
 589                __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
 590                                rw, what, rq->errors, 0, NULL);
 591        }
 592}
 593
 594static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
 595{
 596        blk_add_trace_rq(q, rq, BLK_TA_ABORT);
 597}
 598
 599static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
 600{
 601        blk_add_trace_rq(q, rq, BLK_TA_INSERT);
 602}
 603
 604static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
 605{
 606        blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
 607}
 608
 609static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
 610{
 611        blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
 612}
 613
 614static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
 615{
 616        blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
 617}
 618
 619/**
 620 * blk_add_trace_bio - Add a trace for a bio oriented action
 621 * @q:          queue the io is for
 622 * @bio:        the source bio
 623 * @what:       the action
 624 *
 625 * Description:
 626 *     Records an action against a bio. Will log the bio offset + size.
 627 *
 628 **/
 629static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
 630                                     u32 what)
 631{
 632        struct blk_trace *bt = q->blk_trace;
 633
 634        if (likely(!bt))
 635                return;
 636
 637        __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
 638                        !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
 639}
 640
 641static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
 642{
 643        blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
 644}
 645
 646static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
 647{
 648        blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
 649}
 650
 651static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
 652{
 653        blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
 654}
 655
 656static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
 657{
 658        blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
 659}
 660
 661static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
 662{
 663        blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
 664}
 665
 666static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
 667{
 668        if (bio)
 669                blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
 670        else {
 671                struct blk_trace *bt = q->blk_trace;
 672
 673                if (bt)
 674                        __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
 675        }
 676}
 677
 678
 679static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
 680{
 681        if (bio)
 682                blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
 683        else {
 684                struct blk_trace *bt = q->blk_trace;
 685
 686                if (bt)
 687                        __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
 688        }
 689}
 690
 691static void blk_add_trace_plug(struct request_queue *q)
 692{
 693        struct blk_trace *bt = q->blk_trace;
 694
 695        if (bt)
 696                __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
 697}
 698
 699static void blk_add_trace_unplug_io(struct request_queue *q)
 700{
 701        struct blk_trace *bt = q->blk_trace;
 702
 703        if (bt) {
 704                unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
 705                __be64 rpdu = cpu_to_be64(pdu);
 706
 707                __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
 708                                sizeof(rpdu), &rpdu);
 709        }
 710}
 711
 712static void blk_add_trace_unplug_timer(struct request_queue *q)
 713{
 714        struct blk_trace *bt = q->blk_trace;
 715
 716        if (bt) {
 717                unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
 718                __be64 rpdu = cpu_to_be64(pdu);
 719
 720                __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
 721                                sizeof(rpdu), &rpdu);
 722        }
 723}
 724
 725static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
 726                                unsigned int pdu)
 727{
 728        struct blk_trace *bt = q->blk_trace;
 729
 730        if (bt) {
 731                __be64 rpdu = cpu_to_be64(pdu);
 732
 733                __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
 734                                BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
 735                                sizeof(rpdu), &rpdu);
 736        }
 737}
 738
 739/**
 740 * blk_add_trace_remap - Add a trace for a remap operation
 741 * @q:          queue the io is for
 742 * @bio:        the source bio
 743 * @dev:        target device
 744 * @from:       source sector
 745 * @to:         target sector
 746 *
 747 * Description:
 748 *     Device mapper or raid target sometimes need to split a bio because
 749 *     it spans a stripe (or similar). Add a trace for that action.
 750 *
 751 **/
 752static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 753                                       dev_t dev, sector_t from, sector_t to)
 754{
 755        struct blk_trace *bt = q->blk_trace;
 756        struct blk_io_trace_remap r;
 757
 758        if (likely(!bt))
 759                return;
 760
 761        r.device = cpu_to_be32(dev);
 762        r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
 763        r.sector = cpu_to_be64(to);
 764
 765        __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
 766                        !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
 767}
 768
 769/**
 770 * blk_add_driver_data - Add binary message with driver-specific data
 771 * @q:          queue the io is for
 772 * @rq:         io request
 773 * @data:       driver-specific data
 774 * @len:        length of driver-specific data
 775 *
 776 * Description:
 777 *     Some drivers might want to write driver-specific data per request.
 778 *
 779 **/
 780void blk_add_driver_data(struct request_queue *q,
 781                         struct request *rq,
 782                         void *data, size_t len)
 783{
 784        struct blk_trace *bt = q->blk_trace;
 785
 786        if (likely(!bt))
 787                return;
 788
 789        if (blk_pc_request(rq))
 790                __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
 791                                rq->errors, len, data);
 792        else
 793                __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
 794                                0, BLK_TA_DRV_DATA, rq->errors, len, data);
 795}
 796EXPORT_SYMBOL_GPL(blk_add_driver_data);
 797
 798static int blk_register_tracepoints(void)
 799{
 800        int ret;
 801
 802        ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
 803        WARN_ON(ret);
 804        ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
 805        WARN_ON(ret);
 806        ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
 807        WARN_ON(ret);
 808        ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
 809        WARN_ON(ret);
 810        ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
 811        WARN_ON(ret);
 812        ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
 813        WARN_ON(ret);
 814        ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
 815        WARN_ON(ret);
 816        ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
 817        WARN_ON(ret);
 818        ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
 819        WARN_ON(ret);
 820        ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
 821        WARN_ON(ret);
 822        ret = register_trace_block_getrq(blk_add_trace_getrq);
 823        WARN_ON(ret);
 824        ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
 825        WARN_ON(ret);
 826        ret = register_trace_block_plug(blk_add_trace_plug);
 827        WARN_ON(ret);
 828        ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
 829        WARN_ON(ret);
 830        ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
 831        WARN_ON(ret);
 832        ret = register_trace_block_split(blk_add_trace_split);
 833        WARN_ON(ret);
 834        ret = register_trace_block_remap(blk_add_trace_remap);
 835        WARN_ON(ret);
 836        return 0;
 837}
 838
 839static void blk_unregister_tracepoints(void)
 840{
 841        unregister_trace_block_remap(blk_add_trace_remap);
 842        unregister_trace_block_split(blk_add_trace_split);
 843        unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
 844        unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
 845        unregister_trace_block_plug(blk_add_trace_plug);
 846        unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
 847        unregister_trace_block_getrq(blk_add_trace_getrq);
 848        unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
 849        unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
 850        unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
 851        unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
 852        unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
 853        unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
 854        unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
 855        unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
 856        unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
 857        unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
 858
 859        tracepoint_synchronize_unregister();
 860}
 861