linux/drivers/block/xen-blkfront.c History
<<
>>
Prefs
   1/*
   2 * blkfront.c
   3 *
   4 * XenLinux virtual block device driver.
   5 *
   6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   8 * Copyright (c) 2004, Christian Limpach
   9 * Copyright (c) 2004, Andrew Warfield
  10 * Copyright (c) 2005, Christopher Clark
  11 * Copyright (c) 2005, XenSource Ltd
  12 *
  13 * This program is free software; you can redistribute it and/or
  14 * modify it under the terms of the GNU General Public License version 2
  15 * as published by the Free Software Foundation; or, when distributed
  16 * separately from the Linux kernel or incorporated into other
  17 * software packages, subject to the following license:
  18 *
  19 * Permission is hereby granted, free of charge, to any person obtaining a copy
  20 * of this source file (the "Software"), to deal in the Software without
  21 * restriction, including without limitation the rights to use, copy, modify,
  22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  23 * and to permit persons to whom the Software is furnished to do so, subject to
  24 * the following conditions:
  25 *
  26 * The above copyright notice and this permission notice shall be included in
  27 * all copies or substantial portions of the Software.
  28 *
  29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  35 * IN THE SOFTWARE.
  36 */
  37
  38#include <linux/interrupt.h>
  39#include <linux/blkdev.h>
  40#include <linux/hdreg.h>
  41#include <linux/cdrom.h>
  42#include <linux/module.h>
  43#include <linux/scatterlist.h>
  44
  45#include <xen/xen.h>
  46#include <xen/xenbus.h>
  47#include <xen/grant_table.h>
  48#include <xen/events.h>
  49#include <xen/page.h>
  50
  51#include <xen/interface/grant_table.h>
  52#include <xen/interface/io/blkif.h>
  53#include <xen/interface/io/protocols.h>
  54
  55#include <asm/xen/hypervisor.h>
  56
  57enum blkif_state {
  58        BLKIF_STATE_DISCONNECTED,
  59        BLKIF_STATE_CONNECTED,
  60        BLKIF_STATE_SUSPENDED,
  61};
  62
  63struct blk_shadow {
  64        struct blkif_request req;
  65        unsigned long request;
  66        unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  67};
  68
  69static const struct block_device_operations xlvbd_block_fops;
  70
  71#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
  72
  73/*
  74 * We have one of these per vbd, whether ide, scsi or 'other'.  They
  75 * hang in private_data off the gendisk structure. We may end up
  76 * putting all kinds of interesting stuff here :-)
  77 */
  78struct blkfront_info
  79{
  80        struct xenbus_device *xbdev;
  81        struct gendisk *gd;
  82        int vdevice;
  83        blkif_vdev_t handle;
  84        enum blkif_state connected;
  85        int ring_ref;
  86        struct blkif_front_ring ring;
  87        struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  88        unsigned int evtchn, irq;
  89        struct request_queue *rq;
  90        struct work_struct work;
  91        struct gnttab_free_callback callback;
  92        struct blk_shadow shadow[BLK_RING_SIZE];
  93        unsigned long shadow_free;
  94        int feature_barrier;
  95        int is_ready;
  96
  97        /**
  98         * The number of people holding this device open.  We won't allow a
  99         * hot-unplug unless this is 0.
 100         */
 101        int users;
 102};
 103
 104static DEFINE_SPINLOCK(blkif_io_lock);
 105
 106#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
 107        (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
 108#define GRANT_INVALID_REF       0
 109
 110#define PARTS_PER_DISK          16
 111#define PARTS_PER_EXT_DISK      256
 112
 113#define BLKIF_MAJOR(dev) ((dev)>>8)
 114#define BLKIF_MINOR(dev) ((dev) & 0xff)
 115
 116#define EXT_SHIFT 28
 117#define EXTENDED (1<<EXT_SHIFT)
 118#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
 119#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
 120
 121#define DEV_NAME        "xvd"   /* name in /dev */
 122
 123static int get_id_from_freelist(struct blkfront_info *info)
 124{
 125        unsigned long free = info->shadow_free;
 126        BUG_ON(free >= BLK_RING_SIZE);
 127        info->shadow_free = info->shadow[free].req.id;
 128        info->shadow[free].req.id = 0x0fffffee; /* debug */
 129        return free;
 130}
 131
 132static void add_id_to_freelist(struct blkfront_info *info,
 133                               unsigned long id)
 134{
 135        info->shadow[id].req.id  = info->shadow_free;
 136        info->shadow[id].request = 0;
 137        info->shadow_free = id;
 138}
 139
 140static void blkif_restart_queue_callback(void *arg)
 141{
 142        struct blkfront_info *info = (struct blkfront_info *)arg;
 143        schedule_work(&info->work);
 144}
 145
 146static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
 147{
 148        /* We don't have real geometry info, but let's at least return
 149           values consistent with the size of the device */
 150        sector_t nsect = get_capacity(bd->bd_disk);
 151        sector_t cylinders = nsect;
 152
 153        hg->heads = 0xff;
 154        hg->sectors = 0x3f;
 155        sector_div(cylinders, hg->heads * hg->sectors);
 156        hg->cylinders = cylinders;
 157        if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
 158                hg->cylinders = 0xffff;
 159        return 0;
 160}
 161
 162static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
 163                       unsigned command, unsigned long argument)
 164{
 165        struct blkfront_info *info = bdev->bd_disk->private_data;
 166        int i;
 167
 168        dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
 169                command, (long)argument);
 170
 171        switch (command) {
 172        case CDROMMULTISESSION:
 173                dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
 174                for (i = 0; i < sizeof(struct cdrom_multisession); i++)
 175                        if (put_user(0, (char __user *)(argument + i)))
 176                                return -EFAULT;
 177                return 0;
 178
 179        case CDROM_GET_CAPABILITY: {
 180                struct gendisk *gd = info->gd;
 181                if (gd->flags & GENHD_FL_CD)
 182                        return 0;
 183                return -EINVAL;
 184        }
 185
 186        default:
 187                /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
 188                  command);*/
 189                return -EINVAL; /* same return as native Linux */
 190        }
 191
 192        return 0;
 193}
 194
 195/*
 196 * blkif_queue_request
 197 *
 198 * request block io
 199 *
 200 * id: for guest use only.
 201 * operation: BLKIF_OP_{READ,WRITE,PROBE}
 202 * buffer: buffer to read/write into. this should be a
 203 *   virtual address in the guest os.
 204 */
 205static int blkif_queue_request(struct request *req)
 206{
 207        struct blkfront_info *info = req->rq_disk->private_data;
 208        unsigned long buffer_mfn;
 209        struct blkif_request *ring_req;
 210        unsigned long id;
 211        unsigned int fsect, lsect;
 212        int i, ref;
 213        grant_ref_t gref_head;
 214        struct scatterlist *sg;
 215
 216        if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 217                return 1;
 218
 219        if (gnttab_alloc_grant_references(
 220                BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
 221                gnttab_request_free_callback(
 222                        &info->callback,
 223                        blkif_restart_queue_callback,
 224                        info,
 225                        BLKIF_MAX_SEGMENTS_PER_REQUEST);
 226                return 1;
 227        }
 228
 229        /* Fill out a communications ring structure. */
 230        ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 231        id = get_id_from_freelist(info);
 232        info->shadow[id].request = (unsigned long)req;
 233
 234        ring_req->id = id;
 235        ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
 236        ring_req->handle = info->handle;
 237
 238        ring_req->operation = rq_data_dir(req) ?
 239                BLKIF_OP_WRITE : BLKIF_OP_READ;
 240        if (blk_barrier_rq(req))
 241                ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 242
 243        ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
 244        BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
 245
 246        for_each_sg(info->sg, sg, ring_req->nr_segments, i) {
 247                buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
 248                fsect = sg->offset >> 9;
 249                lsect = fsect + (sg->length >> 9) - 1;
 250                /* install a grant reference. */
 251                ref = gnttab_claim_grant_reference(&gref_head);
 252                BUG_ON(ref == -ENOSPC);
 253
 254                gnttab_grant_foreign_access_ref(
 255                                ref,
 256                                info->xbdev->otherend_id,
 257                                buffer_mfn,
 258                                rq_data_dir(req) );
 259
 260                info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
 261                ring_req->seg[i] =
 262                                (struct blkif_request_segment) {
 263                                        .gref       = ref,
 264                                        .first_sect = fsect,
 265                                        .last_sect  = lsect };
 266        }
 267
 268        info->ring.req_prod_pvt++;
 269
 270        /* Keep a private copy so we can reissue requests when recovering. */
 271        info->shadow[id].req = *ring_req;
 272
 273        gnttab_free_grant_references(gref_head);
 274
 275        return 0;
 276}
 277
 278
 279static inline void flush_requests(struct blkfront_info *info)
 280{
 281        int notify;
 282
 283        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
 284
 285        if (notify)
 286                notify_remote_via_irq(info->irq);
 287}
 288
 289/*
 290 * do_blkif_request
 291 *  read a block; request is in a request queue
 292 */
 293static void do_blkif_request(struct request_queue *rq)
 294{
 295        struct blkfront_info *info = NULL;
 296        struct request *req;
 297        int queued;
 298
 299        pr_debug("Entered do_blkif_request\n");
 300
 301        queued = 0;
 302
 303        while ((req = blk_peek_request(rq)) != NULL) {
 304                info = req->rq_disk->private_data;
 305
 306                if (RING_FULL(&info->ring))
 307                        goto wait;
 308
 309                blk_start_request(req);
 310
 311                if (!blk_fs_request(req)) {
 312                        __blk_end_request_all(req, -EIO);
 313                        continue;
 314                }
 315
 316                pr_debug("do_blk_req %p: cmd %p, sec %lx, "
 317                         "(%u/%u) buffer:%p [%s]\n",
 318                         req, req->cmd, (unsigned long)blk_rq_pos(req),
 319                         blk_rq_cur_sectors(req), blk_rq_sectors(req),
 320                         req->buffer, rq_data_dir(req) ? "write" : "read");
 321
 322                if (blkif_queue_request(req)) {
 323                        blk_requeue_request(rq, req);
 324wait:
 325                        /* Avoid pointless unplugs. */
 326                        blk_stop_queue(rq);
 327                        break;
 328                }
 329
 330                queued++;
 331        }
 332
 333        if (queued != 0)
 334                flush_requests(info);
 335}
 336
 337static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 338{
 339        struct request_queue *rq;
 340
 341        rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
 342        if (rq == NULL)
 343                return -1;
 344
 345        queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 346
 347        /* Hard sector size and max sectors impersonate the equiv. hardware. */
 348        blk_queue_logical_block_size(rq, sector_size);
 349        blk_queue_max_sectors(rq, 512);
 350
 351        /* Each segment in a request is up to an aligned page in size. */
 352        blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
 353        blk_queue_max_segment_size(rq, PAGE_SIZE);
 354
 355        /* Ensure a merged request will fit in a single I/O ring slot. */
 356        blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 357        blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 358
 359        /* Make sure buffer addresses are sector-aligned. */
 360        blk_queue_dma_alignment(rq, 511);
 361
 362        /* Make sure we don't use bounce buffers. */
 363        blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
 364
 365        gd->queue = rq;
 366
 367        return 0;
 368}
 369
 370
 371static int xlvbd_barrier(struct blkfront_info *info)
 372{
 373        int err;
 374
 375        err = blk_queue_ordered(info->rq,
 376                                info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
 377                                NULL);
 378
 379        if (err)
 380                return err;
 381
 382        printk(KERN_INFO "blkfront: %s: barriers %s\n",
 383               info->gd->disk_name,
 384               info->feature_barrier ? "enabled" : "disabled");
 385        return 0;
 386}
 387
 388
 389static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 390                               struct blkfront_info *info,
 391                               u16 vdisk_info, u16 sector_size)
 392{
 393        struct gendisk *gd;
 394        int nr_minors = 1;
 395        int err = -ENODEV;
 396        unsigned int offset;
 397        int minor;
 398        int nr_parts;
 399
 400        BUG_ON(info->gd != NULL);
 401        BUG_ON(info->rq != NULL);
 402
 403        if ((info->vdevice>>EXT_SHIFT) > 1) {
 404                /* this is above the extended range; something is wrong */
 405                printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
 406                return -ENODEV;
 407        }
 408
 409        if (!VDEV_IS_EXTENDED(info->vdevice)) {
 410                minor = BLKIF_MINOR(info->vdevice);
 411                nr_parts = PARTS_PER_DISK;
 412        } else {
 413                minor = BLKIF_MINOR_EXT(info->vdevice);
 414                nr_parts = PARTS_PER_EXT_DISK;
 415        }
 416
 417        if ((minor % nr_parts) == 0)
 418                nr_minors = nr_parts;
 419
 420        gd = alloc_disk(nr_minors);
 421        if (gd == NULL)
 422                goto out;
 423
 424        offset = minor / nr_parts;
 425
 426        if (nr_minors > 1) {
 427                if (offset < 26)
 428                        sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
 429                else
 430                        sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
 431                                'a' + ((offset / 26)-1), 'a' + (offset % 26));
 432        } else {
 433                if (offset < 26)
 434                        sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
 435                                'a' + offset,
 436                                minor & (nr_parts - 1));
 437                else
 438                        sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
 439                                'a' + ((offset / 26) - 1),
 440                                'a' + (offset % 26),
 441                                minor & (nr_parts - 1));
 442        }
 443
 444        gd->major = XENVBD_MAJOR;
 445        gd->first_minor = minor;
 446        gd->fops = &xlvbd_block_fops;
 447        gd->private_data = info;
 448        gd->driverfs_dev = &(info->xbdev->dev);
 449        set_capacity(gd, capacity);
 450
 451        if (xlvbd_init_blk_queue(gd, sector_size)) {
 452                del_gendisk(gd);
 453                goto out;
 454        }
 455
 456        info->rq = gd->queue;
 457        info->gd = gd;
 458
 459        if (info->feature_barrier)
 460                xlvbd_barrier(info);
 461
 462        if (vdisk_info & VDISK_READONLY)
 463                set_disk_ro(gd, 1);
 464
 465        if (vdisk_info & VDISK_REMOVABLE)
 466                gd->flags |= GENHD_FL_REMOVABLE;
 467
 468        if (vdisk_info & VDISK_CDROM)
 469                gd->flags |= GENHD_FL_CD;
 470
 471        return 0;
 472
 473 out:
 474        return err;
 475}
 476
 477static void kick_pending_request_queues(struct blkfront_info *info)
 478{
 479        if (!RING_FULL(&info->ring)) {
 480                /* Re-enable calldowns. */
 481                blk_start_queue(info->rq);
 482                /* Kick things off immediately. */
 483                do_blkif_request(info->rq);
 484        }
 485}
 486
 487static void blkif_restart_queue(struct work_struct *work)
 488{
 489        struct blkfront_info *info = container_of(work, struct blkfront_info, work);
 490
 491        spin_lock_irq(&blkif_io_lock);
 492        if (info->connected == BLKIF_STATE_CONNECTED)
 493                kick_pending_request_queues(info);
 494        spin_unlock_irq(&blkif_io_lock);
 495}
 496
 497static void blkif_free(struct blkfront_info *info, int suspend)
 498{
 499        /* Prevent new requests being issued until we fix things up. */
 500        spin_lock_irq(&blkif_io_lock);
 501        info->connected = suspend ?
 502                BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
 503        /* No more blkif_request(). */
 504        if (info->rq)
 505                blk_stop_queue(info->rq);
 506        /* No more gnttab callback work. */
 507        gnttab_cancel_free_callback(&info->callback);
 508        spin_unlock_irq(&blkif_io_lock);
 509
 510        /* Flush gnttab callback work. Must be done with no locks held. */
 511        flush_scheduled_work();
 512
 513        /* Free resources associated with old device channel. */
 514        if (info->ring_ref != GRANT_INVALID_REF) {
 515                gnttab_end_foreign_access(info->ring_ref, 0,
 516                                          (unsigned long)info->ring.sring);
 517                info->ring_ref = GRANT_INVALID_REF;
 518                info->ring.sring = NULL;
 519        }
 520        if (info->irq)
 521                unbind_from_irqhandler(info->irq, info);
 522        info->evtchn = info->irq = 0;
 523
 524}
 525
 526static void blkif_completion(struct blk_shadow *s)
 527{
 528        int i;
 529        for (i = 0; i < s->req.nr_segments; i++)
 530                gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
 531}
 532
 533static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 534{
 535        struct request *req;
 536        struct blkif_response *bret;
 537        RING_IDX i, rp;
 538        unsigned long flags;
 539        struct blkfront_info *info = (struct blkfront_info *)dev_id;
 540        int error;
 541
 542        spin_lock_irqsave(&blkif_io_lock, flags);
 543
 544        if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
 545                spin_unlock_irqrestore(&blkif_io_lock, flags);
 546                return IRQ_HANDLED;
 547        }
 548
 549 again:
 550        rp = info->ring.sring->rsp_prod;
 551        rmb(); /* Ensure we see queued responses up to 'rp'. */
 552
 553        for (i = info->ring.rsp_cons; i != rp; i++) {
 554                unsigned long id;
 555
 556                bret = RING_GET_RESPONSE(&info->ring, i);
 557                id   = bret->id;
 558                req  = (struct request *)info->shadow[id].request;
 559
 560                blkif_completion(&info->shadow[id]);
 561
 562                add_id_to_freelist(info, id);
 563
 564                error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
 565                switch (bret->operation) {
 566                case BLKIF_OP_WRITE_BARRIER:
 567                        if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
 568                                printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
 569                                       info->gd->disk_name);
 570                                error = -EOPNOTSUPP;
 571                                info->feature_barrier = 0;
 572                                xlvbd_barrier(info);
 573                        }
 574                        /* fall through */
 575                case BLKIF_OP_READ:
 576                case BLKIF_OP_WRITE:
 577                        if (unlikely(bret->status != BLKIF_RSP_OKAY))
 578                                dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
 579                                        "request: %x\n", bret->status);
 580
 581                        __blk_end_request_all(req, error);
 582                        break;
 583                default:
 584                        BUG();
 585                }
 586        }
 587
 588        info->ring.rsp_cons = i;
 589
 590        if (i != info->ring.req_prod_pvt) {
 591                int more_to_do;
 592                RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
 593                if (more_to_do)
 594                        goto again;
 595        } else
 596                info->ring.sring->rsp_event = i + 1;
 597
 598        kick_pending_request_queues(info);
 599
 600        spin_unlock_irqrestore(&blkif_io_lock, flags);
 601
 602        return IRQ_HANDLED;
 603}
 604
 605
 606static int setup_blkring(struct xenbus_device *dev,
 607                         struct blkfront_info *info)
 608{
 609        struct blkif_sring *sring;
 610        int err;
 611
 612        info->ring_ref = GRANT_INVALID_REF;
 613
 614        sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
 615        if (!sring) {
 616                xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
 617                return -ENOMEM;
 618        }
 619        SHARED_RING_INIT(sring);
 620        FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 621
 622        sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
 623
 624        err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
 625        if (err < 0) {
 626                free_page((unsigned long)sring);
 627                info->ring.sring = NULL;
 628                goto fail;
 629        }
 630        info->ring_ref = err;
 631
 632        err = xenbus_alloc_evtchn(dev, &info->evtchn);
 633        if (err)
 634                goto fail;
 635
 636        err = bind_evtchn_to_irqhandler(info->evtchn,
 637                                        blkif_interrupt,
 638                                        IRQF_SAMPLE_RANDOM, "blkif", info);
 639        if (err <= 0) {
 640                xenbus_dev_fatal(dev, err,
 641                                 "bind_evtchn_to_irqhandler failed");
 642                goto fail;
 643        }
 644        info->irq = err;
 645
 646        return 0;
 647fail:
 648        blkif_free(info, 0);
 649        return err;
 650}
 651
 652
 653/* Common code used when first setting up, and when resuming. */
 654static int talk_to_backend(struct xenbus_device *dev,
 655                           struct blkfront_info *info)
 656{
 657        const char *message = NULL;
 658        struct xenbus_transaction xbt;
 659        int err;
 660
 661        /* Create shared ring, alloc event channel. */
 662        err = setup_blkring(dev, info);
 663        if (err)
 664                goto out;
 665
 666again:
 667        err = xenbus_transaction_start(&xbt);
 668        if (err) {
 669                xenbus_dev_fatal(dev, err, "starting transaction");
 670                goto destroy_blkring;
 671        }
 672
 673        err = xenbus_printf(xbt, dev->nodename,
 674                            "ring-ref", "%u", info->ring_ref);
 675        if (err) {
 676                message = "writing ring-ref";
 677                goto abort_transaction;
 678        }
 679        err = xenbus_printf(xbt, dev->nodename,
 680                            "event-channel", "%u", info->evtchn);
 681        if (err) {
 682                message = "writing event-channel";
 683                goto abort_transaction;
 684        }
 685        err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
 686                            XEN_IO_PROTO_ABI_NATIVE);
 687        if (err) {
 688                message = "writing protocol";
 689                goto abort_transaction;
 690        }
 691
 692        err = xenbus_transaction_end(xbt, 0);
 693        if (err) {
 694                if (err == -EAGAIN)
 695                        goto again;
 696                xenbus_dev_fatal(dev, err, "completing transaction");
 697                goto destroy_blkring;
 698        }
 699
 700        xenbus_switch_state(dev, XenbusStateInitialised);
 701
 702        return 0;
 703
 704 abort_transaction:
 705        xenbus_transaction_end(xbt, 1);
 706        if (message)
 707                xenbus_dev_fatal(dev, err, "%s", message);
 708 destroy_blkring:
 709        blkif_free(info, 0);
 710 out:
 711        return err;
 712}
 713
 714
 715/**
 716 * Entry point to this code when a new device is created.  Allocate the basic
 717 * structures and the ring buffer for communication with the backend, and
 718 * inform the backend of the appropriate details for those.  Switch to
 719 * Initialised state.
 720 */
 721static int blkfront_probe(struct xenbus_device *dev,
 722                          const struct xenbus_device_id *id)
 723{
 724        int err, vdevice, i;
 725        struct blkfront_info *info;
 726
 727        /* FIXME: Use dynamic device id if this is not set. */
 728        err = xenbus_scanf(XBT_NIL, dev->nodename,
 729                           "virtual-device", "%i", &vdevice);
 730        if (err != 1) {
 731                /* go looking in the extended area instead */
 732                err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
 733                                   "%i", &vdevice);
 734                if (err != 1) {
 735                        xenbus_dev_fatal(dev, err, "reading virtual-device");
 736                        return err;
 737                }
 738        }
 739
 740        info = kzalloc(sizeof(*info), GFP_KERNEL);
 741        if (!info) {
 742                xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
 743                return -ENOMEM;
 744        }
 745
 746        info->xbdev = dev;
 747        info->vdevice = vdevice;
 748        info->connected = BLKIF_STATE_DISCONNECTED;
 749        INIT_WORK(&info->work, blkif_restart_queue);
 750
 751        for (i = 0; i < BLK_RING_SIZE; i++)
 752                info->shadow[i].req.id = i+1;
 753        info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
 754
 755        /* Front end dir is a number, which is used as the id. */
 756        info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
 757        dev_set_drvdata(&dev->dev, info);
 758
 759        err = talk_to_backend(dev, info);
 760        if (err) {
 761                kfree(info);
 762                dev_set_drvdata(&dev->dev, NULL);
 763                return err;
 764        }
 765
 766        return 0;
 767}
 768
 769
 770static int blkif_recover(struct blkfront_info *info)
 771{
 772        int i;
 773        struct blkif_request *req;
 774        struct blk_shadow *copy;
 775        int j;
 776
 777        /* Stage 1: Make a safe copy of the shadow state. */
 778        copy = kmalloc(sizeof(info->shadow),
 779                       GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
 780        if (!copy)
 781                return -ENOMEM;
 782        memcpy(copy, info->shadow, sizeof(info->shadow));
 783
 784        /* Stage 2: Set up free list. */
 785        memset(&info->shadow, 0, sizeof(info->shadow));
 786        for (i = 0; i < BLK_RING_SIZE; i++)
 787                info->shadow[i].req.id = i+1;
 788        info->shadow_free = info->ring.req_prod_pvt;
 789        info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
 790
 791        /* Stage 3: Find pending requests and requeue them. */
 792        for (i = 0; i < BLK_RING_SIZE; i++) {
 793                /* Not in use? */
 794                if (copy[i].request == 0)
 795                        continue;
 796
 797                /* Grab a request slot and copy shadow state into it. */
 798                req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 799                *req = copy[i].req;
 800
 801                /* We get a new request id, and must reset the shadow state. */
 802                req->id = get_id_from_freelist(info);
 803                memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
 804
 805                /* Rewrite any grant references invalidated by susp/resume. */
 806                for (j = 0; j < req->nr_segments; j++)
 807                        gnttab_grant_foreign_access_ref(
 808                                req->seg[j].gref,
 809                                info->xbdev->otherend_id,
 810                                pfn_to_mfn(info->shadow[req->id].frame[j]),
 811                                rq_data_dir(
 812                                        (struct request *)
 813                                        info->shadow[req->id].request));
 814                info->shadow[req->id].req = *req;
 815
 816                info->ring.req_prod_pvt++;
 817        }
 818
 819        kfree(copy);
 820
 821        xenbus_switch_state(info->xbdev, XenbusStateConnected);
 822
 823        spin_lock_irq(&blkif_io_lock);
 824
 825        /* Now safe for us to use the shared ring */
 826        info->connected = BLKIF_STATE_CONNECTED;
 827
 828        /* Send off requeued requests */
 829        flush_requests(info);
 830
 831        /* Kick any other new requests queued since we resumed */
 832        kick_pending_request_queues(info);
 833
 834        spin_unlock_irq(&blkif_io_lock);
 835
 836        return 0;
 837}
 838
 839/**
 840 * We are reconnecting to the backend, due to a suspend/resume, or a backend
 841 * driver restart.  We tear down our blkif structure and recreate it, but
 842 * leave the device-layer structures intact so that this is transparent to the
 843 * rest of the kernel.
 844 */
 845static int blkfront_resume(struct xenbus_device *dev)
 846{
 847        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 848        int err;
 849
 850        dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
 851
 852        blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 853
 854        err = talk_to_backend(dev, info);
 855        if (info->connected == BLKIF_STATE_SUSPENDED && !err)
 856                err = blkif_recover(info);
 857
 858        return err;
 859}
 860
 861
 862/*
 863 * Invoked when the backend is finally 'ready' (and has told produced
 864 * the details about the physical device - #sectors, size, etc).
 865 */
 866static void blkfront_connect(struct blkfront_info *info)
 867{
 868        unsigned long long sectors;
 869        unsigned long sector_size;
 870        unsigned int binfo;
 871        int err;
 872
 873        if ((info->connected == BLKIF_STATE_CONNECTED) ||
 874            (info->connected == BLKIF_STATE_SUSPENDED) )
 875                return;
 876
 877        dev_dbg(&info->xbdev->dev, "%s:%s.\n",
 878                __func__, info->xbdev->otherend);
 879
 880        err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 881                            "sectors", "%llu", &sectors,
 882                            "info", "%u", &binfo,
 883                            "sector-size", "%lu", &sector_size,
 884                            NULL);
 885        if (err) {
 886                xenbus_dev_fatal(info->xbdev, err,
 887                                 "reading backend fields at %s",
 888                                 info->xbdev->otherend);
 889                return;
 890        }
 891
 892        err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 893                            "feature-barrier", "%lu", &info->feature_barrier,
 894                            NULL);
 895        if (err)
 896                info->feature_barrier = 0;
 897
 898        err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 899        if (err) {
 900                xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 901                                 info->xbdev->otherend);
 902                return;
 903        }
 904
 905        xenbus_switch_state(info->xbdev, XenbusStateConnected);
 906
 907        /* Kick pending requests. */
 908        spin_lock_irq(&blkif_io_lock);
 909        info->connected = BLKIF_STATE_CONNECTED;
 910        kick_pending_request_queues(info);
 911        spin_unlock_irq(&blkif_io_lock);
 912
 913        add_disk(info->gd);
 914
 915        info->is_ready = 1;
 916}
 917
 918/**
 919 * Handle the change of state of the backend to Closing.  We must delete our
 920 * device-layer structures now, to ensure that writes are flushed through to
 921 * the backend.  Once is this done, we can switch to Closed in
 922 * acknowledgement.
 923 */
 924static void blkfront_closing(struct xenbus_device *dev)
 925{
 926        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 927        unsigned long flags;
 928
 929        dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
 930
 931        if (info->rq == NULL)
 932                goto out;
 933
 934        spin_lock_irqsave(&blkif_io_lock, flags);
 935
 936        /* No more blkif_request(). */
 937        blk_stop_queue(info->rq);
 938
 939        /* No more gnttab callback work. */
 940        gnttab_cancel_free_callback(&info->callback);
 941        spin_unlock_irqrestore(&blkif_io_lock, flags);
 942
 943        /* Flush gnttab callback work. Must be done with no locks held. */
 944        flush_scheduled_work();
 945
 946        blk_cleanup_queue(info->rq);
 947        info->rq = NULL;
 948
 949        del_gendisk(info->gd);
 950
 951 out:
 952        xenbus_frontend_closed(dev);
 953}
 954
 955/**
 956 * Callback received when the backend's state changes.
 957 */
 958static void backend_changed(struct xenbus_device *dev,
 959                            enum xenbus_state backend_state)
 960{
 961        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 962        struct block_device *bd;
 963
 964        dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
 965
 966        switch (backend_state) {
 967        case XenbusStateInitialising:
 968        case XenbusStateInitWait:
 969        case XenbusStateInitialised:
 970        case XenbusStateUnknown:
 971        case XenbusStateClosed:
 972                break;
 973
 974        case XenbusStateConnected:
 975                blkfront_connect(info);
 976                break;
 977
 978        case XenbusStateClosing:
 979                if (info->gd == NULL) {
 980                        xenbus_frontend_closed(dev);
 981                        break;
 982                }
 983                bd = bdget_disk(info->gd, 0);
 984                if (bd == NULL)
 985                        xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
 986
 987                mutex_lock(&bd->bd_mutex);
 988                if (info->users > 0)
 989                        xenbus_dev_error(dev, -EBUSY,
 990                                         "Device in use; refusing to close");
 991                else
 992                        blkfront_closing(dev);
 993                mutex_unlock(&bd->bd_mutex);
 994                bdput(bd);
 995                break;
 996        }
 997}
 998
 999static int blkfront_remove(struct xenbus_device *dev)
1000{
1001        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1002
1003        dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
1004
1005        blkif_free(info, 0);
1006
1007        kfree(info);
1008
1009        return 0;
1010}
1011
1012static int blkfront_is_ready(struct xenbus_device *dev)
1013{
1014        struct blkfront_info *info = dev_get_drvdata(&dev->dev);
1015
1016        return info->is_ready;
1017}
1018
1019static int blkif_open(struct block_device *bdev, fmode_t mode)
1020{
1021        struct blkfront_info *info = bdev->bd_disk->private_data;
1022        info->users++;
1023        return 0;
1024}
1025
1026static int blkif_release(struct gendisk *disk, fmode_t mode)
1027{
1028        struct blkfront_info *info = disk->private_data;
1029        info->users--;
1030        if (info->users == 0) {
1031                /* Check whether we have been instructed to close.  We will
1032                   have ignored this request initially, as the device was
1033                   still mounted. */
1034                struct xenbus_device *dev = info->xbdev;
1035                enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
1036
1037                if (state == XenbusStateClosing && info->is_ready)
1038                        blkfront_closing(dev);
1039        }
1040        return 0;
1041}
1042
1043static const struct block_device_operations xlvbd_block_fops =
1044{
1045        .owner = THIS_MODULE,
1046        .open = blkif_open,
1047        .release = blkif_release,
1048        .getgeo = blkif_getgeo,
1049        .locked_ioctl = blkif_ioctl,
1050};
1051
1052
1053static struct xenbus_device_id blkfront_ids[] = {
1054        { "vbd" },
1055        { "" }
1056};
1057
1058static struct xenbus_driver blkfront = {
1059        .name = "vbd",
1060        .owner = THIS_MODULE,
1061        .ids = blkfront_ids,
1062        .probe = blkfront_probe,
1063        .remove = blkfront_remove,
1064        .resume = blkfront_resume,
1065        .otherend_changed = backend_changed,
1066        .is_ready = blkfront_is_ready,
1067};
1068
1069static int __init xlblk_init(void)
1070{
1071        if (!xen_domain())
1072                return -ENODEV;
1073
1074        if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
1075                printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
1076                       XENVBD_MAJOR, DEV_NAME);
1077                return -ENODEV;
1078        }
1079
1080        return xenbus_register_frontend(&blkfront);
1081}
1082module_init(xlblk_init);
1083
1084
1085static void __exit xlblk_exit(void)
1086{
1087        return xenbus_unregister_driver(&blkfront);
1088}
1089module_exit(xlblk_exit);
1090
1091MODULE_DESCRIPTION("Xen virtual block device frontend");
1092MODULE_LICENSE("GPL");
1093MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
1094MODULE_ALIAS("xen:vbd");
1095MODULE_ALIAS("xenblk");
1096
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.