linux/virt/kvm/assigned-dev.c
<<
>>
Prefs
   1/*
   2 * Kernel-based Virtual Machine - device assignment support
   3 *
   4 * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
   5 *
   6 * This work is licensed under the terms of the GNU GPL, version 2.  See
   7 * the COPYING file in the top-level directory.
   8 *
   9 */
  10
  11#include <linux/kvm_host.h>
  12#include <linux/kvm.h>
  13#include <linux/uaccess.h>
  14#include <linux/vmalloc.h>
  15#include <linux/errno.h>
  16#include <linux/spinlock.h>
  17#include <linux/pci.h>
  18#include <linux/interrupt.h>
  19#include <linux/slab.h>
  20#include <linux/namei.h>
  21#include <linux/fs.h>
  22#include "irq.h"
  23
  24static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
  25                                                      int assigned_dev_id)
  26{
  27        struct list_head *ptr;
  28        struct kvm_assigned_dev_kernel *match;
  29
  30        list_for_each(ptr, head) {
  31                match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
  32                if (match->assigned_dev_id == assigned_dev_id)
  33                        return match;
  34        }
  35        return NULL;
  36}
  37
  38static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
  39                                    *assigned_dev, int irq)
  40{
  41        int i, index;
  42        struct msix_entry *host_msix_entries;
  43
  44        host_msix_entries = assigned_dev->host_msix_entries;
  45
  46        index = -1;
  47        for (i = 0; i < assigned_dev->entries_nr; i++)
  48                if (irq == host_msix_entries[i].vector) {
  49                        index = i;
  50                        break;
  51                }
  52        if (index < 0)
  53                printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
  54
  55        return index;
  56}
  57
  58static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
  59{
  60        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
  61        int ret;
  62
  63        spin_lock(&assigned_dev->intx_lock);
  64        if (pci_check_and_mask_intx(assigned_dev->dev)) {
  65                assigned_dev->host_irq_disabled = true;
  66                ret = IRQ_WAKE_THREAD;
  67        } else
  68                ret = IRQ_NONE;
  69        spin_unlock(&assigned_dev->intx_lock);
  70
  71        return ret;
  72}
  73
  74static void
  75kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
  76                                 int vector)
  77{
  78        if (unlikely(assigned_dev->irq_requested_type &
  79                     KVM_DEV_IRQ_GUEST_INTX)) {
  80                spin_lock(&assigned_dev->intx_mask_lock);
  81                if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
  82                        kvm_set_irq(assigned_dev->kvm,
  83                                    assigned_dev->irq_source_id, vector, 1);
  84                spin_unlock(&assigned_dev->intx_mask_lock);
  85        } else
  86                kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
  87                            vector, 1);
  88}
  89
  90static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
  91{
  92        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
  93
  94        if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
  95                spin_lock_irq(&assigned_dev->intx_lock);
  96                disable_irq_nosync(irq);
  97                assigned_dev->host_irq_disabled = true;
  98                spin_unlock_irq(&assigned_dev->intx_lock);
  99        }
 100
 101        kvm_assigned_dev_raise_guest_irq(assigned_dev,
 102                                         assigned_dev->guest_irq);
 103
 104        return IRQ_HANDLED;
 105}
 106
 107#ifdef __KVM_HAVE_MSI
 108static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
 109{
 110        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 111
 112        kvm_assigned_dev_raise_guest_irq(assigned_dev,
 113                                         assigned_dev->guest_irq);
 114
 115        return IRQ_HANDLED;
 116}
 117#endif
 118
 119#ifdef __KVM_HAVE_MSIX
 120static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
 121{
 122        struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
 123        int index = find_index_from_host_irq(assigned_dev, irq);
 124        u32 vector;
 125
 126        if (index >= 0) {
 127                vector = assigned_dev->guest_msix_entries[index].vector;
 128                kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
 129        }
 130
 131        return IRQ_HANDLED;
 132}
 133#endif
 134
 135/* Ack the irq line for an assigned device */
 136static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 137{
 138        struct kvm_assigned_dev_kernel *dev =
 139                container_of(kian, struct kvm_assigned_dev_kernel,
 140                             ack_notifier);
 141
 142        kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
 143
 144        spin_lock(&dev->intx_mask_lock);
 145
 146        if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
 147                bool reassert = false;
 148
 149                spin_lock_irq(&dev->intx_lock);
 150                /*
 151                 * The guest IRQ may be shared so this ack can come from an
 152                 * IRQ for another guest device.
 153                 */
 154                if (dev->host_irq_disabled) {
 155                        if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
 156                                enable_irq(dev->host_irq);
 157                        else if (!pci_check_and_unmask_intx(dev->dev))
 158                                reassert = true;
 159                        dev->host_irq_disabled = reassert;
 160                }
 161                spin_unlock_irq(&dev->intx_lock);
 162
 163                if (reassert)
 164                        kvm_set_irq(dev->kvm, dev->irq_source_id,
 165                                    dev->guest_irq, 1);
 166        }
 167
 168        spin_unlock(&dev->intx_mask_lock);
 169}
 170
 171static void deassign_guest_irq(struct kvm *kvm,
 172                               struct kvm_assigned_dev_kernel *assigned_dev)
 173{
 174        if (assigned_dev->ack_notifier.gsi != -1)
 175                kvm_unregister_irq_ack_notifier(kvm,
 176                                                &assigned_dev->ack_notifier);
 177
 178        kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
 179                    assigned_dev->guest_irq, 0);
 180
 181        if (assigned_dev->irq_source_id != -1)
 182                kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
 183        assigned_dev->irq_source_id = -1;
 184        assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
 185}
 186
 187/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
 188static void deassign_host_irq(struct kvm *kvm,
 189                              struct kvm_assigned_dev_kernel *assigned_dev)
 190{
 191        /*
 192         * We disable irq here to prevent further events.
 193         *
 194         * Notice this maybe result in nested disable if the interrupt type is
 195         * INTx, but it's OK for we are going to free it.
 196         *
 197         * If this function is a part of VM destroy, please ensure that till
 198         * now, the kvm state is still legal for probably we also have to wait
 199         * on a currently running IRQ handler.
 200         */
 201        if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 202                int i;
 203                for (i = 0; i < assigned_dev->entries_nr; i++)
 204                        disable_irq(assigned_dev->host_msix_entries[i].vector);
 205
 206                for (i = 0; i < assigned_dev->entries_nr; i++)
 207                        free_irq(assigned_dev->host_msix_entries[i].vector,
 208                                 assigned_dev);
 209
 210                assigned_dev->entries_nr = 0;
 211                kfree(assigned_dev->host_msix_entries);
 212                kfree(assigned_dev->guest_msix_entries);
 213                pci_disable_msix(assigned_dev->dev);
 214        } else {
 215                /* Deal with MSI and INTx */
 216                if ((assigned_dev->irq_requested_type &
 217                     KVM_DEV_IRQ_HOST_INTX) &&
 218                    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
 219                        spin_lock_irq(&assigned_dev->intx_lock);
 220                        pci_intx(assigned_dev->dev, false);
 221                        spin_unlock_irq(&assigned_dev->intx_lock);
 222                        synchronize_irq(assigned_dev->host_irq);
 223                } else
 224                        disable_irq(assigned_dev->host_irq);
 225
 226                free_irq(assigned_dev->host_irq, assigned_dev);
 227
 228                if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
 229                        pci_disable_msi(assigned_dev->dev);
 230        }
 231
 232        assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
 233}
 234
 235static int kvm_deassign_irq(struct kvm *kvm,
 236                            struct kvm_assigned_dev_kernel *assigned_dev,
 237                            unsigned long irq_requested_type)
 238{
 239        unsigned long guest_irq_type, host_irq_type;
 240
 241        if (!irqchip_in_kernel(kvm))
 242                return -EINVAL;
 243        /* no irq assignment to deassign */
 244        if (!assigned_dev->irq_requested_type)
 245                return -ENXIO;
 246
 247        host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
 248        guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
 249
 250        if (host_irq_type)
 251                deassign_host_irq(kvm, assigned_dev);
 252        if (guest_irq_type)
 253                deassign_guest_irq(kvm, assigned_dev);
 254
 255        return 0;
 256}
 257
 258static void kvm_free_assigned_irq(struct kvm *kvm,
 259                                  struct kvm_assigned_dev_kernel *assigned_dev)
 260{
 261        kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
 262}
 263
 264static void kvm_free_assigned_device(struct kvm *kvm,
 265                                     struct kvm_assigned_dev_kernel
 266                                     *assigned_dev)
 267{
 268        kvm_free_assigned_irq(kvm, assigned_dev);
 269
 270        pci_reset_function(assigned_dev->dev);
 271        if (pci_load_and_free_saved_state(assigned_dev->dev,
 272                                          &assigned_dev->pci_saved_state))
 273                printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
 274                       __func__, dev_name(&assigned_dev->dev->dev));
 275        else
 276                pci_restore_state(assigned_dev->dev);
 277
 278        assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 279
 280        pci_release_regions(assigned_dev->dev);
 281        pci_disable_device(assigned_dev->dev);
 282        pci_dev_put(assigned_dev->dev);
 283
 284        list_del(&assigned_dev->list);
 285        kfree(assigned_dev);
 286}
 287
 288void kvm_free_all_assigned_devices(struct kvm *kvm)
 289{
 290        struct list_head *ptr, *ptr2;
 291        struct kvm_assigned_dev_kernel *assigned_dev;
 292
 293        list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
 294                assigned_dev = list_entry(ptr,
 295                                          struct kvm_assigned_dev_kernel,
 296                                          list);
 297
 298                kvm_free_assigned_device(kvm, assigned_dev);
 299        }
 300}
 301
 302static int assigned_device_enable_host_intx(struct kvm *kvm,
 303                                            struct kvm_assigned_dev_kernel *dev)
 304{
 305        irq_handler_t irq_handler;
 306        unsigned long flags;
 307
 308        dev->host_irq = dev->dev->irq;
 309
 310        /*
 311         * We can only share the IRQ line with other host devices if we are
 312         * able to disable the IRQ source at device-level - independently of
 313         * the guest driver. Otherwise host devices may suffer from unbounded
 314         * IRQ latencies when the guest keeps the line asserted.
 315         */
 316        if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
 317                irq_handler = kvm_assigned_dev_intx;
 318                flags = IRQF_SHARED;
 319        } else {
 320                irq_handler = NULL;
 321                flags = IRQF_ONESHOT;
 322        }
 323        if (request_threaded_irq(dev->host_irq, irq_handler,
 324                                 kvm_assigned_dev_thread_intx, flags,
 325                                 dev->irq_name, dev))
 326                return -EIO;
 327
 328        if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
 329                spin_lock_irq(&dev->intx_lock);
 330                pci_intx(dev->dev, true);
 331                spin_unlock_irq(&dev->intx_lock);
 332        }
 333        return 0;
 334}
 335
 336#ifdef __KVM_HAVE_MSI
 337static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
 338{
 339        return IRQ_WAKE_THREAD;
 340}
 341
 342static int assigned_device_enable_host_msi(struct kvm *kvm,
 343                                           struct kvm_assigned_dev_kernel *dev)
 344{
 345        int r;
 346
 347        if (!dev->dev->msi_enabled) {
 348                r = pci_enable_msi(dev->dev);
 349                if (r)
 350                        return r;
 351        }
 352
 353        dev->host_irq = dev->dev->irq;
 354        if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
 355                                 kvm_assigned_dev_thread_msi, 0,
 356                                 dev->irq_name, dev)) {
 357                pci_disable_msi(dev->dev);
 358                return -EIO;
 359        }
 360
 361        return 0;
 362}
 363#endif
 364
 365#ifdef __KVM_HAVE_MSIX
 366static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
 367{
 368        return IRQ_WAKE_THREAD;
 369}
 370
 371static int assigned_device_enable_host_msix(struct kvm *kvm,
 372                                            struct kvm_assigned_dev_kernel *dev)
 373{
 374        int i, r = -EINVAL;
 375
 376        /* host_msix_entries and guest_msix_entries should have been
 377         * initialized */
 378        if (dev->entries_nr == 0)
 379                return r;
 380
 381        r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
 382        if (r)
 383                return r;
 384
 385        for (i = 0; i < dev->entries_nr; i++) {
 386                r = request_threaded_irq(dev->host_msix_entries[i].vector,
 387                                         kvm_assigned_dev_msix,
 388                                         kvm_assigned_dev_thread_msix,
 389                                         0, dev->irq_name, dev);
 390                if (r)
 391                        goto err;
 392        }
 393
 394        return 0;
 395err:
 396        for (i -= 1; i >= 0; i--)
 397                free_irq(dev->host_msix_entries[i].vector, dev);
 398        pci_disable_msix(dev->dev);
 399        return r;
 400}
 401
 402#endif
 403
 404static int assigned_device_enable_guest_intx(struct kvm *kvm,
 405                                struct kvm_assigned_dev_kernel *dev,
 406                                struct kvm_assigned_irq *irq)
 407{
 408        dev->guest_irq = irq->guest_irq;
 409        dev->ack_notifier.gsi = irq->guest_irq;
 410        return 0;
 411}
 412
 413#ifdef __KVM_HAVE_MSI
 414static int assigned_device_enable_guest_msi(struct kvm *kvm,
 415                        struct kvm_assigned_dev_kernel *dev,
 416                        struct kvm_assigned_irq *irq)
 417{
 418        dev->guest_irq = irq->guest_irq;
 419        dev->ack_notifier.gsi = -1;
 420        return 0;
 421}
 422#endif
 423
 424#ifdef __KVM_HAVE_MSIX
 425static int assigned_device_enable_guest_msix(struct kvm *kvm,
 426                        struct kvm_assigned_dev_kernel *dev,
 427                        struct kvm_assigned_irq *irq)
 428{
 429        dev->guest_irq = irq->guest_irq;
 430        dev->ack_notifier.gsi = -1;
 431        return 0;
 432}
 433#endif
 434
 435static int assign_host_irq(struct kvm *kvm,
 436                           struct kvm_assigned_dev_kernel *dev,
 437                           __u32 host_irq_type)
 438{
 439        int r = -EEXIST;
 440
 441        if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
 442                return r;
 443
 444        snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
 445                 pci_name(dev->dev));
 446
 447        switch (host_irq_type) {
 448        case KVM_DEV_IRQ_HOST_INTX:
 449                r = assigned_device_enable_host_intx(kvm, dev);
 450                break;
 451#ifdef __KVM_HAVE_MSI
 452        case KVM_DEV_IRQ_HOST_MSI:
 453                r = assigned_device_enable_host_msi(kvm, dev);
 454                break;
 455#endif
 456#ifdef __KVM_HAVE_MSIX
 457        case KVM_DEV_IRQ_HOST_MSIX:
 458                r = assigned_device_enable_host_msix(kvm, dev);
 459                break;
 460#endif
 461        default:
 462                r = -EINVAL;
 463        }
 464        dev->host_irq_disabled = false;
 465
 466        if (!r)
 467                dev->irq_requested_type |= host_irq_type;
 468
 469        return r;
 470}
 471
 472static int assign_guest_irq(struct kvm *kvm,
 473                            struct kvm_assigned_dev_kernel *dev,
 474                            struct kvm_assigned_irq *irq,
 475                            unsigned long guest_irq_type)
 476{
 477        int id;
 478        int r = -EEXIST;
 479
 480        if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
 481                return r;
 482
 483        id = kvm_request_irq_source_id(kvm);
 484        if (id < 0)
 485                return id;
 486
 487        dev->irq_source_id = id;
 488
 489        switch (guest_irq_type) {
 490        case KVM_DEV_IRQ_GUEST_INTX:
 491                r = assigned_device_enable_guest_intx(kvm, dev, irq);
 492                break;
 493#ifdef __KVM_HAVE_MSI
 494        case KVM_DEV_IRQ_GUEST_MSI:
 495                r = assigned_device_enable_guest_msi(kvm, dev, irq);
 496                break;
 497#endif
 498#ifdef __KVM_HAVE_MSIX
 499        case KVM_DEV_IRQ_GUEST_MSIX:
 500                r = assigned_device_enable_guest_msix(kvm, dev, irq);
 501                break;
 502#endif
 503        default:
 504                r = -EINVAL;
 505        }
 506
 507        if (!r) {
 508                dev->irq_requested_type |= guest_irq_type;
 509                if (dev->ack_notifier.gsi != -1)
 510                        kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
 511        } else
 512                kvm_free_irq_source_id(kvm, dev->irq_source_id);
 513
 514        return r;
 515}
 516
 517/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
 518static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 519                                   struct kvm_assigned_irq *assigned_irq)
 520{
 521        int r = -EINVAL;
 522        struct kvm_assigned_dev_kernel *match;
 523        unsigned long host_irq_type, guest_irq_type;
 524
 525        if (!irqchip_in_kernel(kvm))
 526                return r;
 527
 528        mutex_lock(&kvm->lock);
 529        r = -ENODEV;
 530        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 531                                      assigned_irq->assigned_dev_id);
 532        if (!match)
 533                goto out;
 534
 535        host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
 536        guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
 537
 538        r = -EINVAL;
 539        /* can only assign one type at a time */
 540        if (hweight_long(host_irq_type) > 1)
 541                goto out;
 542        if (hweight_long(guest_irq_type) > 1)
 543                goto out;
 544        if (host_irq_type == 0 && guest_irq_type == 0)
 545                goto out;
 546
 547        r = 0;
 548        if (host_irq_type)
 549                r = assign_host_irq(kvm, match, host_irq_type);
 550        if (r)
 551                goto out;
 552
 553        if (guest_irq_type)
 554                r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
 555out:
 556        mutex_unlock(&kvm->lock);
 557        return r;
 558}
 559
 560static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
 561                                         struct kvm_assigned_irq
 562                                         *assigned_irq)
 563{
 564        int r = -ENODEV;
 565        struct kvm_assigned_dev_kernel *match;
 566        unsigned long irq_type;
 567
 568        mutex_lock(&kvm->lock);
 569
 570        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 571                                      assigned_irq->assigned_dev_id);
 572        if (!match)
 573                goto out;
 574
 575        irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
 576                                          KVM_DEV_IRQ_GUEST_MASK);
 577        r = kvm_deassign_irq(kvm, match, irq_type);
 578out:
 579        mutex_unlock(&kvm->lock);
 580        return r;
 581}
 582
 583/*
 584 * We want to test whether the caller has been granted permissions to
 585 * use this device.  To be able to configure and control the device,
 586 * the user needs access to PCI configuration space and BAR resources.
 587 * These are accessed through PCI sysfs.  PCI config space is often
 588 * passed to the process calling this ioctl via file descriptor, so we
 589 * can't rely on access to that file.  We can check for permissions
 590 * on each of the BAR resource files, which is a pretty clear
 591 * indicator that the user has been granted access to the device.
 592 */
 593static int probe_sysfs_permissions(struct pci_dev *dev)
 594{
 595#ifdef CONFIG_SYSFS
 596        int i;
 597        bool bar_found = false;
 598
 599        for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
 600                char *kpath, *syspath;
 601                struct path path;
 602                struct inode *inode;
 603                int r;
 604
 605                if (!pci_resource_len(dev, i))
 606                        continue;
 607
 608                kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
 609                if (!kpath)
 610                        return -ENOMEM;
 611
 612                /* Per sysfs-rules, sysfs is always at /sys */
 613                syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
 614                kfree(kpath);
 615                if (!syspath)
 616                        return -ENOMEM;
 617
 618                r = kern_path(syspath, LOOKUP_FOLLOW, &path);
 619                kfree(syspath);
 620                if (r)
 621                        return r;
 622
 623                inode = path.dentry->d_inode;
 624
 625                r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
 626                path_put(&path);
 627                if (r)
 628                        return r;
 629
 630                bar_found = true;
 631        }
 632
 633        /* If no resources, probably something special */
 634        if (!bar_found)
 635                return -EPERM;
 636
 637        return 0;
 638#else
 639        return -EINVAL; /* No way to control the device without sysfs */
 640#endif
 641}
 642
 643static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 644                                      struct kvm_assigned_pci_dev *assigned_dev)
 645{
 646        int r = 0, idx;
 647        struct kvm_assigned_dev_kernel *match;
 648        struct pci_dev *dev;
 649
 650        if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
 651                return -EINVAL;
 652
 653        mutex_lock(&kvm->lock);
 654        idx = srcu_read_lock(&kvm->srcu);
 655
 656        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 657                                      assigned_dev->assigned_dev_id);
 658        if (match) {
 659                /* device already assigned */
 660                r = -EEXIST;
 661                goto out;
 662        }
 663
 664        match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
 665        if (match == NULL) {
 666                printk(KERN_INFO "%s: Couldn't allocate memory\n",
 667                       __func__);
 668                r = -ENOMEM;
 669                goto out;
 670        }
 671        dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
 672                                   assigned_dev->busnr,
 673                                   assigned_dev->devfn);
 674        if (!dev) {
 675                printk(KERN_INFO "%s: host device not found\n", __func__);
 676                r = -EINVAL;
 677                goto out_free;
 678        }
 679
 680        /* Don't allow bridges to be assigned */
 681        if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
 682                r = -EPERM;
 683                goto out_put;
 684        }
 685
 686        r = probe_sysfs_permissions(dev);
 687        if (r)
 688                goto out_put;
 689
 690        if (pci_enable_device(dev)) {
 691                printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
 692                r = -EBUSY;
 693                goto out_put;
 694        }
 695        r = pci_request_regions(dev, "kvm_assigned_device");
 696        if (r) {
 697                printk(KERN_INFO "%s: Could not get access to device regions\n",
 698                       __func__);
 699                goto out_disable;
 700        }
 701
 702        pci_reset_function(dev);
 703        pci_save_state(dev);
 704        match->pci_saved_state = pci_store_saved_state(dev);
 705        if (!match->pci_saved_state)
 706                printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
 707                       __func__, dev_name(&dev->dev));
 708
 709        if (!pci_intx_mask_supported(dev))
 710                assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
 711
 712        match->assigned_dev_id = assigned_dev->assigned_dev_id;
 713        match->host_segnr = assigned_dev->segnr;
 714        match->host_busnr = assigned_dev->busnr;
 715        match->host_devfn = assigned_dev->devfn;
 716        match->flags = assigned_dev->flags;
 717        match->dev = dev;
 718        spin_lock_init(&match->intx_lock);
 719        spin_lock_init(&match->intx_mask_lock);
 720        match->irq_source_id = -1;
 721        match->kvm = kvm;
 722        match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
 723
 724        list_add(&match->list, &kvm->arch.assigned_dev_head);
 725
 726        if (!kvm->arch.iommu_domain) {
 727                r = kvm_iommu_map_guest(kvm);
 728                if (r)
 729                        goto out_list_del;
 730        }
 731        r = kvm_assign_device(kvm, match);
 732        if (r)
 733                goto out_list_del;
 734
 735out:
 736        srcu_read_unlock(&kvm->srcu, idx);
 737        mutex_unlock(&kvm->lock);
 738        return r;
 739out_list_del:
 740        if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
 741                printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
 742                       __func__, dev_name(&dev->dev));
 743        list_del(&match->list);
 744        pci_release_regions(dev);
 745out_disable:
 746        pci_disable_device(dev);
 747out_put:
 748        pci_dev_put(dev);
 749out_free:
 750        kfree(match);
 751        srcu_read_unlock(&kvm->srcu, idx);
 752        mutex_unlock(&kvm->lock);
 753        return r;
 754}
 755
 756static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 757                struct kvm_assigned_pci_dev *assigned_dev)
 758{
 759        int r = 0;
 760        struct kvm_assigned_dev_kernel *match;
 761
 762        mutex_lock(&kvm->lock);
 763
 764        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 765                                      assigned_dev->assigned_dev_id);
 766        if (!match) {
 767                printk(KERN_INFO "%s: device hasn't been assigned before, "
 768                  "so cannot be deassigned\n", __func__);
 769                r = -EINVAL;
 770                goto out;
 771        }
 772
 773        kvm_deassign_device(kvm, match);
 774
 775        kvm_free_assigned_device(kvm, match);
 776
 777out:
 778        mutex_unlock(&kvm->lock);
 779        return r;
 780}
 781
 782
 783#ifdef __KVM_HAVE_MSIX
 784static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
 785                                    struct kvm_assigned_msix_nr *entry_nr)
 786{
 787        int r = 0;
 788        struct kvm_assigned_dev_kernel *adev;
 789
 790        mutex_lock(&kvm->lock);
 791
 792        adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 793                                      entry_nr->assigned_dev_id);
 794        if (!adev) {
 795                r = -EINVAL;
 796                goto msix_nr_out;
 797        }
 798
 799        if (adev->entries_nr == 0) {
 800                adev->entries_nr = entry_nr->entry_nr;
 801                if (adev->entries_nr == 0 ||
 802                    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
 803                        r = -EINVAL;
 804                        goto msix_nr_out;
 805                }
 806
 807                adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
 808                                                entry_nr->entry_nr,
 809                                                GFP_KERNEL);
 810                if (!adev->host_msix_entries) {
 811                        r = -ENOMEM;
 812                        goto msix_nr_out;
 813                }
 814                adev->guest_msix_entries =
 815                        kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
 816                                GFP_KERNEL);
 817                if (!adev->guest_msix_entries) {
 818                        kfree(adev->host_msix_entries);
 819                        r = -ENOMEM;
 820                        goto msix_nr_out;
 821                }
 822        } else /* Not allowed set MSI-X number twice */
 823                r = -EINVAL;
 824msix_nr_out:
 825        mutex_unlock(&kvm->lock);
 826        return r;
 827}
 828
 829static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
 830                                       struct kvm_assigned_msix_entry *entry)
 831{
 832        int r = 0, i;
 833        struct kvm_assigned_dev_kernel *adev;
 834
 835        mutex_lock(&kvm->lock);
 836
 837        adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 838                                      entry->assigned_dev_id);
 839
 840        if (!adev) {
 841                r = -EINVAL;
 842                goto msix_entry_out;
 843        }
 844
 845        for (i = 0; i < adev->entries_nr; i++)
 846                if (adev->guest_msix_entries[i].vector == 0 ||
 847                    adev->guest_msix_entries[i].entry == entry->entry) {
 848                        adev->guest_msix_entries[i].entry = entry->entry;
 849                        adev->guest_msix_entries[i].vector = entry->gsi;
 850                        adev->host_msix_entries[i].entry = entry->entry;
 851                        break;
 852                }
 853        if (i == adev->entries_nr) {
 854                r = -ENOSPC;
 855                goto msix_entry_out;
 856        }
 857
 858msix_entry_out:
 859        mutex_unlock(&kvm->lock);
 860
 861        return r;
 862}
 863#endif
 864
 865static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
 866                struct kvm_assigned_pci_dev *assigned_dev)
 867{
 868        int r = 0;
 869        struct kvm_assigned_dev_kernel *match;
 870
 871        mutex_lock(&kvm->lock);
 872
 873        match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
 874                                      assigned_dev->assigned_dev_id);
 875        if (!match) {
 876                r = -ENODEV;
 877                goto out;
 878        }
 879
 880        spin_lock(&match->intx_mask_lock);
 881
 882        match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
 883        match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
 884
 885        if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
 886                if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
 887                        kvm_set_irq(match->kvm, match->irq_source_id,
 888                                    match->guest_irq, 0);
 889                        /*
 890                         * Masking at hardware-level is performed on demand,
 891                         * i.e. when an IRQ actually arrives at the host.
 892                         */
 893                } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
 894                        /*
 895                         * Unmask the IRQ line if required. Unmasking at
 896                         * device level will be performed by user space.
 897                         */
 898                        spin_lock_irq(&match->intx_lock);
 899                        if (match->host_irq_disabled) {
 900                                enable_irq(match->host_irq);
 901                                match->host_irq_disabled = false;
 902                        }
 903                        spin_unlock_irq(&match->intx_lock);
 904                }
 905        }
 906
 907        spin_unlock(&match->intx_mask_lock);
 908
 909out:
 910        mutex_unlock(&kvm->lock);
 911        return r;
 912}
 913
 914long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 915                                  unsigned long arg)
 916{
 917        void __user *argp = (void __user *)arg;
 918        int r;
 919
 920        switch (ioctl) {
 921        case KVM_ASSIGN_PCI_DEVICE: {
 922                struct kvm_assigned_pci_dev assigned_dev;
 923
 924                r = -EFAULT;
 925                if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
 926                        goto out;
 927                r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
 928                if (r)
 929                        goto out;
 930                break;
 931        }
 932        case KVM_ASSIGN_IRQ: {
 933                r = -EOPNOTSUPP;
 934                break;
 935        }
 936        case KVM_ASSIGN_DEV_IRQ: {
 937                struct kvm_assigned_irq assigned_irq;
 938
 939                r = -EFAULT;
 940                if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
 941                        goto out;
 942                r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
 943                if (r)
 944                        goto out;
 945                break;
 946        }
 947        case KVM_DEASSIGN_DEV_IRQ: {
 948                struct kvm_assigned_irq assigned_irq;
 949
 950                r = -EFAULT;
 951                if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
 952                        goto out;
 953                r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
 954                if (r)
 955                        goto out;
 956                break;
 957        }
 958        case KVM_DEASSIGN_PCI_DEVICE: {
 959                struct kvm_assigned_pci_dev assigned_dev;
 960
 961                r = -EFAULT;
 962                if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
 963                        goto out;
 964                r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
 965                if (r)
 966                        goto out;
 967                break;
 968        }
 969#ifdef KVM_CAP_IRQ_ROUTING
 970        case KVM_SET_GSI_ROUTING: {
 971                struct kvm_irq_routing routing;
 972                struct kvm_irq_routing __user *urouting;
 973                struct kvm_irq_routing_entry *entries;
 974
 975                r = -EFAULT;
 976                if (copy_from_user(&routing, argp, sizeof(routing)))
 977                        goto out;
 978                r = -EINVAL;
 979                if (routing.nr >= KVM_MAX_IRQ_ROUTES)
 980                        goto out;
 981                if (routing.flags)
 982                        goto out;
 983                r = -ENOMEM;
 984                entries = vmalloc(routing.nr * sizeof(*entries));
 985                if (!entries)
 986                        goto out;
 987                r = -EFAULT;
 988                urouting = argp;
 989                if (copy_from_user(entries, urouting->entries,
 990                                   routing.nr * sizeof(*entries)))
 991                        goto out_free_irq_routing;
 992                r = kvm_set_irq_routing(kvm, entries, routing.nr,
 993                                        routing.flags);
 994        out_free_irq_routing:
 995                vfree(entries);
 996                break;
 997        }
 998#endif /* KVM_CAP_IRQ_ROUTING */
 999#ifdef __KVM_HAVE_MSIX
1000        case KVM_ASSIGN_SET_MSIX_NR: {
1001                struct kvm_assigned_msix_nr entry_nr;
1002                r = -EFAULT;
1003                if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
1004                        goto out;
1005                r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
1006                if (r)
1007                        goto out;
1008                break;
1009        }
1010        case KVM_ASSIGN_SET_MSIX_ENTRY: {
1011                struct kvm_assigned_msix_entry entry;
1012                r = -EFAULT;
1013                if (copy_from_user(&entry, argp, sizeof entry))
1014                        goto out;
1015                r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
1016                if (r)
1017                        goto out;
1018                break;
1019        }
1020#endif
1021        case KVM_ASSIGN_SET_INTX_MASK: {
1022                struct kvm_assigned_pci_dev assigned_dev;
1023
1024                r = -EFAULT;
1025                if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
1026                        goto out;
1027                r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
1028                break;
1029        }
1030        default:
1031                r = -ENOTTY;
1032                break;
1033        }
1034out:
1035        return r;
1036}
1037
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.