linux/drivers/virtio/virtio_ring.c
<<
>>
Prefs
   1/* Virtio ring implementation.
   2 *
   3 *  Copyright 2007 Rusty Russell IBM Corporation
   4 *
   5 *  This program is free software; you can redistribute it and/or modify
   6 *  it under the terms of the GNU General Public License as published by
   7 *  the Free Software Foundation; either version 2 of the License, or
   8 *  (at your option) any later version.
   9 *
  10 *  This program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License
  16 *  along with this program; if not, write to the Free Software
  17 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  18 */
  19#include <linux/virtio.h>
  20#include <linux/virtio_ring.h>
  21#include <linux/virtio_config.h>
  22#include <linux/device.h>
  23#include <linux/slab.h>
  24#include <linux/module.h>
  25
  26/* virtio guest is communicating with a virtual "device" that actually runs on
  27 * a host processor.  Memory barriers are used to control SMP effects. */
  28#ifdef CONFIG_SMP
  29/* Where possible, use SMP barriers which are more lightweight than mandatory
  30 * barriers, because mandatory barriers control MMIO effects on accesses
  31 * through relaxed memory I/O windows (which virtio does not use). */
  32#define virtio_mb() smp_mb()
  33#define virtio_rmb() smp_rmb()
  34#define virtio_wmb() smp_wmb()
  35#else
  36/* We must force memory ordering even if guest is UP since host could be
  37 * running on another CPU, but SMP barriers are defined to barrier() in that
  38 * configuration. So fall back to mandatory barriers instead. */
  39#define virtio_mb() mb()
  40#define virtio_rmb() rmb()
  41#define virtio_wmb() wmb()
  42#endif
  43
  44#ifdef DEBUG
  45/* For development, we want to crash whenever the ring is screwed. */
  46#define BAD_RING(_vq, fmt, args...)                             \
  47        do {                                                    \
  48                dev_err(&(_vq)->vq.vdev->dev,                   \
  49                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  50                BUG();                                          \
  51        } while (0)
  52/* Caller is supposed to guarantee no reentry. */
  53#define START_USE(_vq)                                          \
  54        do {                                                    \
  55                if ((_vq)->in_use)                              \
  56                        panic("%s:in_use = %i\n",               \
  57                              (_vq)->vq.name, (_vq)->in_use);   \
  58                (_vq)->in_use = __LINE__;                       \
  59        } while (0)
  60#define END_USE(_vq) \
  61        do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
  62#else
  63#define BAD_RING(_vq, fmt, args...)                             \
  64        do {                                                    \
  65                dev_err(&_vq->vq.vdev->dev,                     \
  66                        "%s:"fmt, (_vq)->vq.name, ##args);      \
  67                (_vq)->broken = true;                           \
  68        } while (0)
  69#define START_USE(vq)
  70#define END_USE(vq)
  71#endif
  72
  73struct vring_virtqueue
  74{
  75        struct virtqueue vq;
  76
  77        /* Actual memory layout for this queue */
  78        struct vring vring;
  79
  80        /* Other side has made a mess, don't try any more. */
  81        bool broken;
  82
  83        /* Host supports indirect buffers */
  84        bool indirect;
  85
  86        /* Host publishes avail event idx */
  87        bool event;
  88
  89        /* Number of free buffers */
  90        unsigned int num_free;
  91        /* Head of free buffer list. */
  92        unsigned int free_head;
  93        /* Number we've added since last sync. */
  94        unsigned int num_added;
  95
  96        /* Last used index we've seen. */
  97        u16 last_used_idx;
  98
  99        /* How to notify other side. FIXME: commonalize hcalls! */
 100        void (*notify)(struct virtqueue *vq);
 101
 102#ifdef DEBUG
 103        /* They're supposed to lock for us. */
 104        unsigned int in_use;
 105#endif
 106
 107        /* Tokens for callbacks. */
 108        void *data[];
 109};
 110
 111#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 112
 113/* Set up an indirect table of descriptors and add it to the queue. */
 114static int vring_add_indirect(struct vring_virtqueue *vq,
 115                              struct scatterlist sg[],
 116                              unsigned int out,
 117                              unsigned int in,
 118                              gfp_t gfp)
 119{
 120        struct vring_desc *desc;
 121        unsigned head;
 122        int i;
 123
 124        desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
 125        if (!desc)
 126                return -ENOMEM;
 127
 128        /* Transfer entries from the sg list into the indirect page */
 129        for (i = 0; i < out; i++) {
 130                desc[i].flags = VRING_DESC_F_NEXT;
 131                desc[i].addr = sg_phys(sg);
 132                desc[i].len = sg->length;
 133                desc[i].next = i+1;
 134                sg++;
 135        }
 136        for (; i < (out + in); i++) {
 137                desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
 138                desc[i].addr = sg_phys(sg);
 139                desc[i].len = sg->length;
 140                desc[i].next = i+1;
 141                sg++;
 142        }
 143
 144        /* Last one doesn't continue. */
 145        desc[i-1].flags &= ~VRING_DESC_F_NEXT;
 146        desc[i-1].next = 0;
 147
 148        /* We're about to use a buffer */
 149        vq->num_free--;
 150
 151        /* Use a single buffer which doesn't continue */
 152        head = vq->free_head;
 153        vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
 154        vq->vring.desc[head].addr = virt_to_phys(desc);
 155        vq->vring.desc[head].len = i * sizeof(struct vring_desc);
 156
 157        /* Update free pointer */
 158        vq->free_head = vq->vring.desc[head].next;
 159
 160        return head;
 161}
 162
 163int virtqueue_add_buf_gfp(struct virtqueue *_vq,
 164                          struct scatterlist sg[],
 165                          unsigned int out,
 166                          unsigned int in,
 167                          void *data,
 168                          gfp_t gfp)
 169{
 170        struct vring_virtqueue *vq = to_vvq(_vq);
 171        unsigned int i, avail, uninitialized_var(prev);
 172        int head;
 173
 174        START_USE(vq);
 175
 176        BUG_ON(data == NULL);
 177
 178        /* If the host supports indirect descriptor tables, and we have multiple
 179         * buffers, then go indirect. FIXME: tune this threshold */
 180        if (vq->indirect && (out + in) > 1 && vq->num_free) {
 181                head = vring_add_indirect(vq, sg, out, in, gfp);
 182                if (likely(head >= 0))
 183                        goto add_head;
 184        }
 185
 186        BUG_ON(out + in > vq->vring.num);
 187        BUG_ON(out + in == 0);
 188
 189        if (vq->num_free < out + in) {
 190                pr_debug("Can't add buf len %i - avail = %i\n",
 191                         out + in, vq->num_free);
 192                /* FIXME: for historical reasons, we force a notify here if
 193                 * there are outgoing parts to the buffer.  Presumably the
 194                 * host should service the ring ASAP. */
 195                if (out)
 196                        vq->notify(&vq->vq);
 197                END_USE(vq);
 198                return -ENOSPC;
 199        }
 200
 201        /* We're about to use some buffers from the free list. */
 202        vq->num_free -= out + in;
 203
 204        head = vq->free_head;
 205        for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
 206                vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
 207                vq->vring.desc[i].addr = sg_phys(sg);
 208                vq->vring.desc[i].len = sg->length;
 209                prev = i;
 210                sg++;
 211        }
 212        for (; in; i = vq->vring.desc[i].next, in--) {
 213                vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
 214                vq->vring.desc[i].addr = sg_phys(sg);
 215                vq->vring.desc[i].len = sg->length;
 216                prev = i;
 217                sg++;
 218        }
 219        /* Last one doesn't continue. */
 220        vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
 221
 222        /* Update free pointer */
 223        vq->free_head = i;
 224
 225add_head:
 226        /* Set token. */
 227        vq->data[head] = data;
 228
 229        /* Put entry in available array (but don't update avail->idx until they
 230         * do sync).  FIXME: avoid modulus here? */
 231        avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num;
 232        vq->vring.avail->ring[avail] = head;
 233
 234        pr_debug("Added buffer head %i to %p\n", head, vq);
 235        END_USE(vq);
 236
 237        return vq->num_free;
 238}
 239EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
 240
 241void virtqueue_kick(struct virtqueue *_vq)
 242{
 243        struct vring_virtqueue *vq = to_vvq(_vq);
 244        u16 new, old;
 245        START_USE(vq);
 246        /* Descriptors and available array need to be set before we expose the
 247         * new available array entries. */
 248        virtio_wmb();
 249
 250        old = vq->vring.avail->idx;
 251        new = vq->vring.avail->idx = old + vq->num_added;
 252        vq->num_added = 0;
 253
 254        /* Need to update avail index before checking if we should notify */
 255        virtio_mb();
 256
 257        if (vq->event ?
 258            vring_need_event(vring_avail_event(&vq->vring), new, old) :
 259            !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
 260                /* Prod other side to tell it about changes. */
 261                vq->notify(&vq->vq);
 262
 263        END_USE(vq);
 264}
 265EXPORT_SYMBOL_GPL(virtqueue_kick);
 266
 267static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 268{
 269        unsigned int i;
 270
 271        /* Clear data ptr. */
 272        vq->data[head] = NULL;
 273
 274        /* Put back on free list: find end */
 275        i = head;
 276
 277        /* Free the indirect table */
 278        if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
 279                kfree(phys_to_virt(vq->vring.desc[i].addr));
 280
 281        while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
 282                i = vq->vring.desc[i].next;
 283                vq->num_free++;
 284        }
 285
 286        vq->vring.desc[i].next = vq->free_head;
 287        vq->free_head = head;
 288        /* Plus final descriptor */
 289        vq->num_free++;
 290}
 291
 292static inline bool more_used(const struct vring_virtqueue *vq)
 293{
 294        return vq->last_used_idx != vq->vring.used->idx;
 295}
 296
 297void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 298{
 299        struct vring_virtqueue *vq = to_vvq(_vq);
 300        void *ret;
 301        unsigned int i;
 302
 303        START_USE(vq);
 304
 305        if (unlikely(vq->broken)) {
 306                END_USE(vq);
 307                return NULL;
 308        }
 309
 310        if (!more_used(vq)) {
 311                pr_debug("No more buffers in queue\n");
 312                END_USE(vq);
 313                return NULL;
 314        }
 315
 316        /* Only get used array entries after they have been exposed by host. */
 317        virtio_rmb();
 318
 319        i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
 320        *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
 321
 322        if (unlikely(i >= vq->vring.num)) {
 323                BAD_RING(vq, "id %u out of range\n", i);
 324                return NULL;
 325        }
 326        if (unlikely(!vq->data[i])) {
 327                BAD_RING(vq, "id %u is not a head!\n", i);
 328                return NULL;
 329        }
 330
 331        /* detach_buf clears data, so grab it now. */
 332        ret = vq->data[i];
 333        detach_buf(vq, i);
 334        vq->last_used_idx++;
 335        /* If we expect an interrupt for the next entry, tell host
 336         * by writing event index and flush out the write before
 337         * the read in the next get_buf call. */
 338        if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
 339                vring_used_event(&vq->vring) = vq->last_used_idx;
 340                virtio_mb();
 341        }
 342
 343        END_USE(vq);
 344        return ret;
 345}
 346EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 347
 348void virtqueue_disable_cb(struct virtqueue *_vq)
 349{
 350        struct vring_virtqueue *vq = to_vvq(_vq);
 351
 352        vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 353}
 354EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 355
 356bool virtqueue_enable_cb(struct virtqueue *_vq)
 357{
 358        struct vring_virtqueue *vq = to_vvq(_vq);
 359
 360        START_USE(vq);
 361
 362        /* We optimistically turn back on interrupts, then check if there was
 363         * more to do. */
 364        /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 365         * either clear the flags bit or point the event index at the next
 366         * entry. Always do both to keep code simple. */
 367        vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 368        vring_used_event(&vq->vring) = vq->last_used_idx;
 369        virtio_mb();
 370        if (unlikely(more_used(vq))) {
 371                END_USE(vq);
 372                return false;
 373        }
 374
 375        END_USE(vq);
 376        return true;
 377}
 378EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 379
 380bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 381{
 382        struct vring_virtqueue *vq = to_vvq(_vq);
 383        u16 bufs;
 384
 385        START_USE(vq);
 386
 387        /* We optimistically turn back on interrupts, then check if there was
 388         * more to do. */
 389        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 390         * either clear the flags bit or point the event index at the next
 391         * entry. Always do both to keep code simple. */
 392        vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 393        /* TODO: tune this threshold */
 394        bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
 395        vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
 396        virtio_mb();
 397        if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
 398                END_USE(vq);
 399                return false;
 400        }
 401
 402        END_USE(vq);
 403        return true;
 404}
 405EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 406
 407void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 408{
 409        struct vring_virtqueue *vq = to_vvq(_vq);
 410        unsigned int i;
 411        void *buf;
 412
 413        START_USE(vq);
 414
 415        for (i = 0; i < vq->vring.num; i++) {
 416                if (!vq->data[i])
 417                        continue;
 418                /* detach_buf clears data, so grab it now. */
 419                buf = vq->data[i];
 420                detach_buf(vq, i);
 421                vq->vring.avail->idx--;
 422                END_USE(vq);
 423                return buf;
 424        }
 425        /* That should have freed everything. */
 426        BUG_ON(vq->num_free != vq->vring.num);
 427
 428        END_USE(vq);
 429        return NULL;
 430}
 431EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
 432
 433irqreturn_t vring_interrupt(int irq, void *_vq)
 434{
 435        struct vring_virtqueue *vq = to_vvq(_vq);
 436
 437        if (!more_used(vq)) {
 438                pr_debug("virtqueue interrupt with no work for %p\n", vq);
 439                return IRQ_NONE;
 440        }
 441
 442        if (unlikely(vq->broken))
 443                return IRQ_HANDLED;
 444
 445        pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
 446        if (vq->vq.callback)
 447                vq->vq.callback(&vq->vq);
 448
 449        return IRQ_HANDLED;
 450}
 451EXPORT_SYMBOL_GPL(vring_interrupt);
 452
 453struct virtqueue *vring_new_virtqueue(unsigned int num,
 454                                      unsigned int vring_align,
 455                                      struct virtio_device *vdev,
 456                                      void *pages,
 457                                      void (*notify)(struct virtqueue *),
 458                                      void (*callback)(struct virtqueue *),
 459                                      const char *name)
 460{
 461        struct vring_virtqueue *vq;
 462        unsigned int i;
 463
 464        /* We assume num is a power of 2. */
 465        if (num & (num - 1)) {
 466                dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
 467                return NULL;
 468        }
 469
 470        vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
 471        if (!vq)
 472                return NULL;
 473
 474        vring_init(&vq->vring, num, pages, vring_align);
 475        vq->vq.callback = callback;
 476        vq->vq.vdev = vdev;
 477        vq->vq.name = name;
 478        vq->notify = notify;
 479        vq->broken = false;
 480        vq->last_used_idx = 0;
 481        vq->num_added = 0;
 482        list_add_tail(&vq->vq.list, &vdev->vqs);
 483#ifdef DEBUG
 484        vq->in_use = false;
 485#endif
 486
 487        vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
 488        vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 489
 490        /* No callback?  Tell other side not to bother us. */
 491        if (!callback)
 492                vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 493
 494        /* Put everything in free lists. */
 495        vq->num_free = num;
 496        vq->free_head = 0;
 497        for (i = 0; i < num-1; i++) {
 498                vq->vring.desc[i].next = i+1;
 499                vq->data[i] = NULL;
 500        }
 501        vq->data[i] = NULL;
 502
 503        return &vq->vq;
 504}
 505EXPORT_SYMBOL_GPL(vring_new_virtqueue);
 506
 507void vring_del_virtqueue(struct virtqueue *vq)
 508{
 509        list_del(&vq->list);
 510        kfree(to_vvq(vq));
 511}
 512EXPORT_SYMBOL_GPL(vring_del_virtqueue);
 513
 514/* Manipulates transport-specific feature bits. */
 515void vring_transport_features(struct virtio_device *vdev)
 516{
 517        unsigned int i;
 518
 519        for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
 520                switch (i) {
 521                case VIRTIO_RING_F_INDIRECT_DESC:
 522                        break;
 523                case VIRTIO_RING_F_EVENT_IDX:
 524                        break;
 525                default:
 526                        /* We don't understand this bit. */
 527                        clear_bit(i, vdev->features);
 528                }
 529        }
 530}
 531EXPORT_SYMBOL_GPL(vring_transport_features);
 532
 533/* return the size of the vring within the virtqueue */
 534unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 535{
 536
 537        struct vring_virtqueue *vq = to_vvq(_vq);
 538
 539        return vq->vring.num;
 540}
 541EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
 542
 543MODULE_LICENSE("GPL");
 544
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.