linux/drivers/s390/crypto/vfio_ap_ops.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Adjunct processor matrix VFIO device driver callbacks.
   4 *
   5 * Copyright IBM Corp. 2018
   6 *
   7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
   8 *            Halil Pasic <pasic@linux.ibm.com>
   9 *            Pierre Morel <pmorel@linux.ibm.com>
  10 */
  11#include <linux/string.h>
  12#include <linux/vfio.h>
  13#include <linux/device.h>
  14#include <linux/list.h>
  15#include <linux/ctype.h>
  16#include <linux/bitops.h>
  17#include <linux/kvm_host.h>
  18#include <linux/module.h>
  19#include <asm/kvm.h>
  20#include <asm/zcrypt.h>
  21
  22#include "vfio_ap_private.h"
  23
  24#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
  25#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
  26
  27static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
  28static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
  29
  30static int match_apqn(struct device *dev, const void *data)
  31{
  32        struct vfio_ap_queue *q = dev_get_drvdata(dev);
  33
  34        return (q->apqn == *(int *)(data)) ? 1 : 0;
  35}
  36
  37/**
  38 * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
  39 * @matrix_mdev: the associated mediated matrix
  40 * @apqn: The queue APQN
  41 *
  42 * Retrieve a queue with a specific APQN from the list of the
  43 * devices of the vfio_ap_drv.
  44 * Verify that the APID and the APQI are set in the matrix.
  45 *
  46 * Returns the pointer to the associated vfio_ap_queue
  47 */
  48static struct vfio_ap_queue *vfio_ap_get_queue(
  49                                        struct ap_matrix_mdev *matrix_mdev,
  50                                        int apqn)
  51{
  52        struct vfio_ap_queue *q;
  53
  54        if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
  55                return NULL;
  56        if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
  57                return NULL;
  58
  59        q = vfio_ap_find_queue(apqn);
  60        if (q)
  61                q->matrix_mdev = matrix_mdev;
  62
  63        return q;
  64}
  65
  66/**
  67 * vfio_ap_wait_for_irqclear
  68 * @apqn: The AP Queue number
  69 *
  70 * Checks the IRQ bit for the status of this APQN using ap_tapq.
  71 * Returns if the ap_tapq function succeeded and the bit is clear.
  72 * Returns if ap_tapq function failed with invalid, deconfigured or
  73 * checkstopped AP.
  74 * Otherwise retries up to 5 times after waiting 20ms.
  75 *
  76 */
  77static void vfio_ap_wait_for_irqclear(int apqn)
  78{
  79        struct ap_queue_status status;
  80        int retry = 5;
  81
  82        do {
  83                status = ap_tapq(apqn, NULL);
  84                switch (status.response_code) {
  85                case AP_RESPONSE_NORMAL:
  86                case AP_RESPONSE_RESET_IN_PROGRESS:
  87                        if (!status.irq_enabled)
  88                                return;
  89                        fallthrough;
  90                case AP_RESPONSE_BUSY:
  91                        msleep(20);
  92                        break;
  93                case AP_RESPONSE_Q_NOT_AVAIL:
  94                case AP_RESPONSE_DECONFIGURED:
  95                case AP_RESPONSE_CHECKSTOPPED:
  96                default:
  97                        WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
  98                                  status.response_code, apqn);
  99                        return;
 100                }
 101        } while (--retry);
 102
 103        WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
 104                  __func__, status.response_code, apqn);
 105}
 106
 107/**
 108 * vfio_ap_free_aqic_resources
 109 * @q: The vfio_ap_queue
 110 *
 111 * Unregisters the ISC in the GIB when the saved ISC not invalid.
 112 * Unpin the guest's page holding the NIB when it exist.
 113 * Reset the saved_pfn and saved_isc to invalid values.
 114 *
 115 */
 116static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
 117{
 118        if (!q)
 119                return;
 120        if (q->saved_isc != VFIO_AP_ISC_INVALID &&
 121            !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
 122                kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
 123                q->saved_isc = VFIO_AP_ISC_INVALID;
 124        }
 125        if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
 126                vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
 127                                 &q->saved_pfn, 1);
 128                q->saved_pfn = 0;
 129        }
 130}
 131
 132/**
 133 * vfio_ap_irq_disable
 134 * @q: The vfio_ap_queue
 135 *
 136 * Uses ap_aqic to disable the interruption and in case of success, reset
 137 * in progress or IRQ disable command already proceeded: calls
 138 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
 139 * and calls vfio_ap_free_aqic_resources() to free the resources associated
 140 * with the AP interrupt handling.
 141 *
 142 * In the case the AP is busy, or a reset is in progress,
 143 * retries after 20ms, up to 5 times.
 144 *
 145 * Returns if ap_aqic function failed with invalid, deconfigured or
 146 * checkstopped AP.
 147 */
 148static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
 149{
 150        struct ap_qirq_ctrl aqic_gisa = {};
 151        struct ap_queue_status status;
 152        int retries = 5;
 153
 154        do {
 155                status = ap_aqic(q->apqn, aqic_gisa, NULL);
 156                switch (status.response_code) {
 157                case AP_RESPONSE_OTHERWISE_CHANGED:
 158                case AP_RESPONSE_NORMAL:
 159                        vfio_ap_wait_for_irqclear(q->apqn);
 160                        goto end_free;
 161                case AP_RESPONSE_RESET_IN_PROGRESS:
 162                case AP_RESPONSE_BUSY:
 163                        msleep(20);
 164                        break;
 165                case AP_RESPONSE_Q_NOT_AVAIL:
 166                case AP_RESPONSE_DECONFIGURED:
 167                case AP_RESPONSE_CHECKSTOPPED:
 168                case AP_RESPONSE_INVALID_ADDRESS:
 169                default:
 170                        /* All cases in default means AP not operational */
 171                        WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
 172                                  status.response_code);
 173                        goto end_free;
 174                }
 175        } while (retries--);
 176
 177        WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
 178                  status.response_code);
 179end_free:
 180        vfio_ap_free_aqic_resources(q);
 181        q->matrix_mdev = NULL;
 182        return status;
 183}
 184
 185/**
 186 * vfio_ap_setirq: Enable Interruption for a APQN
 187 *
 188 * @dev: the device associated with the ap_queue
 189 * @q:   the vfio_ap_queue holding AQIC parameters
 190 *
 191 * Pin the NIB saved in *q
 192 * Register the guest ISC to GIB interface and retrieve the
 193 * host ISC to issue the host side PQAP/AQIC
 194 *
 195 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
 196 * vfio_pin_pages failed.
 197 *
 198 * Otherwise return the ap_queue_status returned by the ap_aqic(),
 199 * all retry handling will be done by the guest.
 200 */
 201static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
 202                                                 int isc,
 203                                                 unsigned long nib)
 204{
 205        struct ap_qirq_ctrl aqic_gisa = {};
 206        struct ap_queue_status status = {};
 207        struct kvm_s390_gisa *gisa;
 208        struct kvm *kvm;
 209        unsigned long h_nib, g_pfn, h_pfn;
 210        int ret;
 211
 212        g_pfn = nib >> PAGE_SHIFT;
 213        ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
 214                             IOMMU_READ | IOMMU_WRITE, &h_pfn);
 215        switch (ret) {
 216        case 1:
 217                break;
 218        default:
 219                status.response_code = AP_RESPONSE_INVALID_ADDRESS;
 220                return status;
 221        }
 222
 223        kvm = q->matrix_mdev->kvm;
 224        gisa = kvm->arch.gisa_int.origin;
 225
 226        h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
 227        aqic_gisa.gisc = isc;
 228        aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
 229        aqic_gisa.ir = 1;
 230        aqic_gisa.gisa = (uint64_t)gisa >> 4;
 231
 232        status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
 233        switch (status.response_code) {
 234        case AP_RESPONSE_NORMAL:
 235                /* See if we did clear older IRQ configuration */
 236                vfio_ap_free_aqic_resources(q);
 237                q->saved_pfn = g_pfn;
 238                q->saved_isc = isc;
 239                break;
 240        case AP_RESPONSE_OTHERWISE_CHANGED:
 241                /* We could not modify IRQ setings: clear new configuration */
 242                vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
 243                kvm_s390_gisc_unregister(kvm, isc);
 244                break;
 245        default:
 246                pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
 247                        status.response_code);
 248                vfio_ap_irq_disable(q);
 249                break;
 250        }
 251
 252        return status;
 253}
 254
 255/**
 256 * handle_pqap: PQAP instruction callback
 257 *
 258 * @vcpu: The vcpu on which we received the PQAP instruction
 259 *
 260 * Get the general register contents to initialize internal variables.
 261 * REG[0]: APQN
 262 * REG[1]: IR and ISC
 263 * REG[2]: NIB
 264 *
 265 * Response.status may be set to following Response Code:
 266 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
 267 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
 268 * - AP_RESPONSE_NORMAL (0) : in case of successs
 269 *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
 270 * We take the matrix_dev lock to ensure serialization on queues and
 271 * mediated device access.
 272 *
 273 * Return 0 if we could handle the request inside KVM.
 274 * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
 275 */
 276static int handle_pqap(struct kvm_vcpu *vcpu)
 277{
 278        uint64_t status;
 279        uint16_t apqn;
 280        struct vfio_ap_queue *q;
 281        struct ap_queue_status qstatus = {
 282                               .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
 283        struct ap_matrix_mdev *matrix_mdev;
 284
 285        /* If we do not use the AIV facility just go to userland */
 286        if (!(vcpu->arch.sie_block->eca & ECA_AIV))
 287                return -EOPNOTSUPP;
 288
 289        apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
 290        mutex_lock(&matrix_dev->lock);
 291
 292        if (!vcpu->kvm->arch.crypto.pqap_hook)
 293                goto out_unlock;
 294        matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
 295                                   struct ap_matrix_mdev, pqap_hook);
 296
 297        /*
 298         * If the KVM pointer is in the process of being set, wait until the
 299         * process has completed.
 300         */
 301        wait_event_cmd(matrix_mdev->wait_for_kvm,
 302                       !matrix_mdev->kvm_busy,
 303                       mutex_unlock(&matrix_dev->lock),
 304                       mutex_lock(&matrix_dev->lock));
 305
 306        /* If the there is no guest using the mdev, there is nothing to do */
 307        if (!matrix_mdev->kvm)
 308                goto out_unlock;
 309
 310        q = vfio_ap_get_queue(matrix_mdev, apqn);
 311        if (!q)
 312                goto out_unlock;
 313
 314        status = vcpu->run->s.regs.gprs[1];
 315
 316        /* If IR bit(16) is set we enable the interrupt */
 317        if ((status >> (63 - 16)) & 0x01)
 318                qstatus = vfio_ap_irq_enable(q, status & 0x07,
 319                                             vcpu->run->s.regs.gprs[2]);
 320        else
 321                qstatus = vfio_ap_irq_disable(q);
 322
 323out_unlock:
 324        memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
 325        vcpu->run->s.regs.gprs[1] >>= 32;
 326        mutex_unlock(&matrix_dev->lock);
 327        return 0;
 328}
 329
 330static void vfio_ap_matrix_init(struct ap_config_info *info,
 331                                struct ap_matrix *matrix)
 332{
 333        matrix->apm_max = info->apxa ? info->Na : 63;
 334        matrix->aqm_max = info->apxa ? info->Nd : 15;
 335        matrix->adm_max = info->apxa ? info->Nd : 15;
 336}
 337
 338static int vfio_ap_mdev_create(struct mdev_device *mdev)
 339{
 340        struct ap_matrix_mdev *matrix_mdev;
 341
 342        if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
 343                return -EPERM;
 344
 345        matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
 346        if (!matrix_mdev) {
 347                atomic_inc(&matrix_dev->available_instances);
 348                return -ENOMEM;
 349        }
 350
 351        matrix_mdev->mdev = mdev;
 352        vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
 353        init_waitqueue_head(&matrix_mdev->wait_for_kvm);
 354        mdev_set_drvdata(mdev, matrix_mdev);
 355        matrix_mdev->pqap_hook.hook = handle_pqap;
 356        matrix_mdev->pqap_hook.owner = THIS_MODULE;
 357        mutex_lock(&matrix_dev->lock);
 358        list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
 359        mutex_unlock(&matrix_dev->lock);
 360
 361        return 0;
 362}
 363
 364static int vfio_ap_mdev_remove(struct mdev_device *mdev)
 365{
 366        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 367
 368        mutex_lock(&matrix_dev->lock);
 369        vfio_ap_mdev_reset_queues(mdev);
 370        list_del(&matrix_mdev->node);
 371        kfree(matrix_mdev);
 372        mdev_set_drvdata(mdev, NULL);
 373        atomic_inc(&matrix_dev->available_instances);
 374        mutex_unlock(&matrix_dev->lock);
 375
 376        return 0;
 377}
 378
 379static ssize_t name_show(struct mdev_type *mtype,
 380                         struct mdev_type_attribute *attr, char *buf)
 381{
 382        return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
 383}
 384
 385static MDEV_TYPE_ATTR_RO(name);
 386
 387static ssize_t available_instances_show(struct mdev_type *mtype,
 388                                        struct mdev_type_attribute *attr,
 389                                        char *buf)
 390{
 391        return sprintf(buf, "%d\n",
 392                       atomic_read(&matrix_dev->available_instances));
 393}
 394
 395static MDEV_TYPE_ATTR_RO(available_instances);
 396
 397static ssize_t device_api_show(struct mdev_type *mtype,
 398                               struct mdev_type_attribute *attr, char *buf)
 399{
 400        return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
 401}
 402
 403static MDEV_TYPE_ATTR_RO(device_api);
 404
 405static struct attribute *vfio_ap_mdev_type_attrs[] = {
 406        &mdev_type_attr_name.attr,
 407        &mdev_type_attr_device_api.attr,
 408        &mdev_type_attr_available_instances.attr,
 409        NULL,
 410};
 411
 412static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
 413        .name = VFIO_AP_MDEV_TYPE_HWVIRT,
 414        .attrs = vfio_ap_mdev_type_attrs,
 415};
 416
 417static struct attribute_group *vfio_ap_mdev_type_groups[] = {
 418        &vfio_ap_mdev_hwvirt_type_group,
 419        NULL,
 420};
 421
 422struct vfio_ap_queue_reserved {
 423        unsigned long *apid;
 424        unsigned long *apqi;
 425        bool reserved;
 426};
 427
 428/**
 429 * vfio_ap_has_queue
 430 *
 431 * @dev: an AP queue device
 432 * @data: a struct vfio_ap_queue_reserved reference
 433 *
 434 * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
 435 * apid or apqi specified in @data:
 436 *
 437 * - If @data contains both an apid and apqi value, then @data will be flagged
 438 *   as reserved if the APID and APQI fields for the AP queue device matches
 439 *
 440 * - If @data contains only an apid value, @data will be flagged as
 441 *   reserved if the APID field in the AP queue device matches
 442 *
 443 * - If @data contains only an apqi value, @data will be flagged as
 444 *   reserved if the APQI field in the AP queue device matches
 445 *
 446 * Returns 0 to indicate the input to function succeeded. Returns -EINVAL if
 447 * @data does not contain either an apid or apqi.
 448 */
 449static int vfio_ap_has_queue(struct device *dev, void *data)
 450{
 451        struct vfio_ap_queue_reserved *qres = data;
 452        struct ap_queue *ap_queue = to_ap_queue(dev);
 453        ap_qid_t qid;
 454        unsigned long id;
 455
 456        if (qres->apid && qres->apqi) {
 457                qid = AP_MKQID(*qres->apid, *qres->apqi);
 458                if (qid == ap_queue->qid)
 459                        qres->reserved = true;
 460        } else if (qres->apid && !qres->apqi) {
 461                id = AP_QID_CARD(ap_queue->qid);
 462                if (id == *qres->apid)
 463                        qres->reserved = true;
 464        } else if (!qres->apid && qres->apqi) {
 465                id = AP_QID_QUEUE(ap_queue->qid);
 466                if (id == *qres->apqi)
 467                        qres->reserved = true;
 468        } else {
 469                return -EINVAL;
 470        }
 471
 472        return 0;
 473}
 474
 475/**
 476 * vfio_ap_verify_queue_reserved
 477 *
 478 * @matrix_dev: a mediated matrix device
 479 * @apid: an AP adapter ID
 480 * @apqi: an AP queue index
 481 *
 482 * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
 483 * driver according to the following rules:
 484 *
 485 * - If both @apid and @apqi are not NULL, then there must be an AP queue
 486 *   device bound to the vfio_ap driver with the APQN identified by @apid and
 487 *   @apqi
 488 *
 489 * - If only @apid is not NULL, then there must be an AP queue device bound
 490 *   to the vfio_ap driver with an APQN containing @apid
 491 *
 492 * - If only @apqi is not NULL, then there must be an AP queue device bound
 493 *   to the vfio_ap driver with an APQN containing @apqi
 494 *
 495 * Returns 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
 496 */
 497static int vfio_ap_verify_queue_reserved(unsigned long *apid,
 498                                         unsigned long *apqi)
 499{
 500        int ret;
 501        struct vfio_ap_queue_reserved qres;
 502
 503        qres.apid = apid;
 504        qres.apqi = apqi;
 505        qres.reserved = false;
 506
 507        ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
 508                                     &qres, vfio_ap_has_queue);
 509        if (ret)
 510                return ret;
 511
 512        if (qres.reserved)
 513                return 0;
 514
 515        return -EADDRNOTAVAIL;
 516}
 517
 518static int
 519vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
 520                                             unsigned long apid)
 521{
 522        int ret;
 523        unsigned long apqi;
 524        unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
 525
 526        if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
 527                return vfio_ap_verify_queue_reserved(&apid, NULL);
 528
 529        for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
 530                ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
 531                if (ret)
 532                        return ret;
 533        }
 534
 535        return 0;
 536}
 537
 538/**
 539 * vfio_ap_mdev_verify_no_sharing
 540 *
 541 * Verifies that the APQNs derived from the cross product of the AP adapter IDs
 542 * and AP queue indexes comprising the AP matrix are not configured for another
 543 * mediated device. AP queue sharing is not allowed.
 544 *
 545 * @matrix_mdev: the mediated matrix device
 546 *
 547 * Returns 0 if the APQNs are not shared, otherwise; returns -EADDRINUSE.
 548 */
 549static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
 550{
 551        struct ap_matrix_mdev *lstdev;
 552        DECLARE_BITMAP(apm, AP_DEVICES);
 553        DECLARE_BITMAP(aqm, AP_DOMAINS);
 554
 555        list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
 556                if (matrix_mdev == lstdev)
 557                        continue;
 558
 559                memset(apm, 0, sizeof(apm));
 560                memset(aqm, 0, sizeof(aqm));
 561
 562                /*
 563                 * We work on full longs, as we can only exclude the leftover
 564                 * bits in non-inverse order. The leftover is all zeros.
 565                 */
 566                if (!bitmap_and(apm, matrix_mdev->matrix.apm,
 567                                lstdev->matrix.apm, AP_DEVICES))
 568                        continue;
 569
 570                if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
 571                                lstdev->matrix.aqm, AP_DOMAINS))
 572                        continue;
 573
 574                return -EADDRINUSE;
 575        }
 576
 577        return 0;
 578}
 579
 580/**
 581 * assign_adapter_store
 582 *
 583 * @dev:        the matrix device
 584 * @attr:       the mediated matrix device's assign_adapter attribute
 585 * @buf:        a buffer containing the AP adapter number (APID) to
 586 *              be assigned
 587 * @count:      the number of bytes in @buf
 588 *
 589 * Parses the APID from @buf and sets the corresponding bit in the mediated
 590 * matrix device's APM.
 591 *
 592 * Returns the number of bytes processed if the APID is valid; otherwise,
 593 * returns one of the following errors:
 594 *
 595 *      1. -EINVAL
 596 *         The APID is not a valid number
 597 *
 598 *      2. -ENODEV
 599 *         The APID exceeds the maximum value configured for the system
 600 *
 601 *      3. -EADDRNOTAVAIL
 602 *         An APQN derived from the cross product of the APID being assigned
 603 *         and the APQIs previously assigned is not bound to the vfio_ap device
 604 *         driver; or, if no APQIs have yet been assigned, the APID is not
 605 *         contained in an APQN bound to the vfio_ap device driver.
 606 *
 607 *      4. -EADDRINUSE
 608 *         An APQN derived from the cross product of the APID being assigned
 609 *         and the APQIs previously assigned is being used by another mediated
 610 *         matrix device
 611 */
 612static ssize_t assign_adapter_store(struct device *dev,
 613                                    struct device_attribute *attr,
 614                                    const char *buf, size_t count)
 615{
 616        int ret;
 617        unsigned long apid;
 618        struct mdev_device *mdev = mdev_from_dev(dev);
 619        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 620
 621        mutex_lock(&matrix_dev->lock);
 622
 623        /*
 624         * If the KVM pointer is in flux or the guest is running, disallow
 625         * un-assignment of adapter
 626         */
 627        if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
 628                ret = -EBUSY;
 629                goto done;
 630        }
 631
 632        ret = kstrtoul(buf, 0, &apid);
 633        if (ret)
 634                goto done;
 635
 636        if (apid > matrix_mdev->matrix.apm_max) {
 637                ret = -ENODEV;
 638                goto done;
 639        }
 640
 641        /*
 642         * Set the bit in the AP mask (APM) corresponding to the AP adapter
 643         * number (APID). The bits in the mask, from most significant to least
 644         * significant bit, correspond to APIDs 0-255.
 645         */
 646        ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
 647        if (ret)
 648                goto done;
 649
 650        set_bit_inv(apid, matrix_mdev->matrix.apm);
 651
 652        ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
 653        if (ret)
 654                goto share_err;
 655
 656        ret = count;
 657        goto done;
 658
 659share_err:
 660        clear_bit_inv(apid, matrix_mdev->matrix.apm);
 661done:
 662        mutex_unlock(&matrix_dev->lock);
 663
 664        return ret;
 665}
 666static DEVICE_ATTR_WO(assign_adapter);
 667
 668/**
 669 * unassign_adapter_store
 670 *
 671 * @dev:        the matrix device
 672 * @attr:       the mediated matrix device's unassign_adapter attribute
 673 * @buf:        a buffer containing the adapter number (APID) to be unassigned
 674 * @count:      the number of bytes in @buf
 675 *
 676 * Parses the APID from @buf and clears the corresponding bit in the mediated
 677 * matrix device's APM.
 678 *
 679 * Returns the number of bytes processed if the APID is valid; otherwise,
 680 * returns one of the following errors:
 681 *      -EINVAL if the APID is not a number
 682 *      -ENODEV if the APID it exceeds the maximum value configured for the
 683 *              system
 684 */
 685static ssize_t unassign_adapter_store(struct device *dev,
 686                                      struct device_attribute *attr,
 687                                      const char *buf, size_t count)
 688{
 689        int ret;
 690        unsigned long apid;
 691        struct mdev_device *mdev = mdev_from_dev(dev);
 692        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 693
 694        mutex_lock(&matrix_dev->lock);
 695
 696        /*
 697         * If the KVM pointer is in flux or the guest is running, disallow
 698         * un-assignment of adapter
 699         */
 700        if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
 701                ret = -EBUSY;
 702                goto done;
 703        }
 704
 705        ret = kstrtoul(buf, 0, &apid);
 706        if (ret)
 707                goto done;
 708
 709        if (apid > matrix_mdev->matrix.apm_max) {
 710                ret = -ENODEV;
 711                goto done;
 712        }
 713
 714        clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
 715        ret = count;
 716done:
 717        mutex_unlock(&matrix_dev->lock);
 718        return ret;
 719}
 720static DEVICE_ATTR_WO(unassign_adapter);
 721
 722static int
 723vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
 724                                             unsigned long apqi)
 725{
 726        int ret;
 727        unsigned long apid;
 728        unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
 729
 730        if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
 731                return vfio_ap_verify_queue_reserved(NULL, &apqi);
 732
 733        for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
 734                ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
 735                if (ret)
 736                        return ret;
 737        }
 738
 739        return 0;
 740}
 741
 742/**
 743 * assign_domain_store
 744 *
 745 * @dev:        the matrix device
 746 * @attr:       the mediated matrix device's assign_domain attribute
 747 * @buf:        a buffer containing the AP queue index (APQI) of the domain to
 748 *              be assigned
 749 * @count:      the number of bytes in @buf
 750 *
 751 * Parses the APQI from @buf and sets the corresponding bit in the mediated
 752 * matrix device's AQM.
 753 *
 754 * Returns the number of bytes processed if the APQI is valid; otherwise returns
 755 * one of the following errors:
 756 *
 757 *      1. -EINVAL
 758 *         The APQI is not a valid number
 759 *
 760 *      2. -ENODEV
 761 *         The APQI exceeds the maximum value configured for the system
 762 *
 763 *      3. -EADDRNOTAVAIL
 764 *         An APQN derived from the cross product of the APQI being assigned
 765 *         and the APIDs previously assigned is not bound to the vfio_ap device
 766 *         driver; or, if no APIDs have yet been assigned, the APQI is not
 767 *         contained in an APQN bound to the vfio_ap device driver.
 768 *
 769 *      4. -EADDRINUSE
 770 *         An APQN derived from the cross product of the APQI being assigned
 771 *         and the APIDs previously assigned is being used by another mediated
 772 *         matrix device
 773 */
 774static ssize_t assign_domain_store(struct device *dev,
 775                                   struct device_attribute *attr,
 776                                   const char *buf, size_t count)
 777{
 778        int ret;
 779        unsigned long apqi;
 780        struct mdev_device *mdev = mdev_from_dev(dev);
 781        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 782        unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
 783
 784        mutex_lock(&matrix_dev->lock);
 785
 786        /*
 787         * If the KVM pointer is in flux or the guest is running, disallow
 788         * assignment of domain
 789         */
 790        if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
 791                ret = -EBUSY;
 792                goto done;
 793        }
 794
 795        ret = kstrtoul(buf, 0, &apqi);
 796        if (ret)
 797                goto done;
 798        if (apqi > max_apqi) {
 799                ret = -ENODEV;
 800                goto done;
 801        }
 802
 803        ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
 804        if (ret)
 805                goto done;
 806
 807        set_bit_inv(apqi, matrix_mdev->matrix.aqm);
 808
 809        ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
 810        if (ret)
 811                goto share_err;
 812
 813        ret = count;
 814        goto done;
 815
 816share_err:
 817        clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
 818done:
 819        mutex_unlock(&matrix_dev->lock);
 820
 821        return ret;
 822}
 823static DEVICE_ATTR_WO(assign_domain);
 824
 825
 826/**
 827 * unassign_domain_store
 828 *
 829 * @dev:        the matrix device
 830 * @attr:       the mediated matrix device's unassign_domain attribute
 831 * @buf:        a buffer containing the AP queue index (APQI) of the domain to
 832 *              be unassigned
 833 * @count:      the number of bytes in @buf
 834 *
 835 * Parses the APQI from @buf and clears the corresponding bit in the
 836 * mediated matrix device's AQM.
 837 *
 838 * Returns the number of bytes processed if the APQI is valid; otherwise,
 839 * returns one of the following errors:
 840 *      -EINVAL if the APQI is not a number
 841 *      -ENODEV if the APQI exceeds the maximum value configured for the system
 842 */
 843static ssize_t unassign_domain_store(struct device *dev,
 844                                     struct device_attribute *attr,
 845                                     const char *buf, size_t count)
 846{
 847        int ret;
 848        unsigned long apqi;
 849        struct mdev_device *mdev = mdev_from_dev(dev);
 850        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 851
 852        mutex_lock(&matrix_dev->lock);
 853
 854        /*
 855         * If the KVM pointer is in flux or the guest is running, disallow
 856         * un-assignment of domain
 857         */
 858        if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
 859                ret = -EBUSY;
 860                goto done;
 861        }
 862
 863        ret = kstrtoul(buf, 0, &apqi);
 864        if (ret)
 865                goto done;
 866
 867        if (apqi > matrix_mdev->matrix.aqm_max) {
 868                ret = -ENODEV;
 869                goto done;
 870        }
 871
 872        clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
 873        ret = count;
 874
 875done:
 876        mutex_unlock(&matrix_dev->lock);
 877        return ret;
 878}
 879static DEVICE_ATTR_WO(unassign_domain);
 880
 881/**
 882 * assign_control_domain_store
 883 *
 884 * @dev:        the matrix device
 885 * @attr:       the mediated matrix device's assign_control_domain attribute
 886 * @buf:        a buffer containing the domain ID to be assigned
 887 * @count:      the number of bytes in @buf
 888 *
 889 * Parses the domain ID from @buf and sets the corresponding bit in the mediated
 890 * matrix device's ADM.
 891 *
 892 * Returns the number of bytes processed if the domain ID is valid; otherwise,
 893 * returns one of the following errors:
 894 *      -EINVAL if the ID is not a number
 895 *      -ENODEV if the ID exceeds the maximum value configured for the system
 896 */
 897static ssize_t assign_control_domain_store(struct device *dev,
 898                                           struct device_attribute *attr,
 899                                           const char *buf, size_t count)
 900{
 901        int ret;
 902        unsigned long id;
 903        struct mdev_device *mdev = mdev_from_dev(dev);
 904        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 905
 906        mutex_lock(&matrix_dev->lock);
 907
 908        /*
 909         * If the KVM pointer is in flux or the guest is running, disallow
 910         * assignment of control domain.
 911         */
 912        if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
 913                ret = -EBUSY;
 914                goto done;
 915        }
 916
 917        ret = kstrtoul(buf, 0, &id);
 918        if (ret)
 919                goto done;
 920
 921        if (id > matrix_mdev->matrix.adm_max) {
 922                ret = -ENODEV;
 923                goto done;
 924        }
 925
 926        /* Set the bit in the ADM (bitmask) corresponding to the AP control
 927         * domain number (id). The bits in the mask, from most significant to
 928         * least significant, correspond to IDs 0 up to the one less than the
 929         * number of control domains that can be assigned.
 930         */
 931        set_bit_inv(id, matrix_mdev->matrix.adm);
 932        ret = count;
 933done:
 934        mutex_unlock(&matrix_dev->lock);
 935        return ret;
 936}
 937static DEVICE_ATTR_WO(assign_control_domain);
 938
 939/**
 940 * unassign_control_domain_store
 941 *
 942 * @dev:        the matrix device
 943 * @attr:       the mediated matrix device's unassign_control_domain attribute
 944 * @buf:        a buffer containing the domain ID to be unassigned
 945 * @count:      the number of bytes in @buf
 946 *
 947 * Parses the domain ID from @buf and clears the corresponding bit in the
 948 * mediated matrix device's ADM.
 949 *
 950 * Returns the number of bytes processed if the domain ID is valid; otherwise,
 951 * returns one of the following errors:
 952 *      -EINVAL if the ID is not a number
 953 *      -ENODEV if the ID exceeds the maximum value configured for the system
 954 */
 955static ssize_t unassign_control_domain_store(struct device *dev,
 956                                             struct device_attribute *attr,
 957                                             const char *buf, size_t count)
 958{
 959        int ret;
 960        unsigned long domid;
 961        struct mdev_device *mdev = mdev_from_dev(dev);
 962        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 963        unsigned long max_domid =  matrix_mdev->matrix.adm_max;
 964
 965        mutex_lock(&matrix_dev->lock);
 966
 967        /*
 968         * If the KVM pointer is in flux or the guest is running, disallow
 969         * un-assignment of control domain.
 970         */
 971        if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
 972                ret = -EBUSY;
 973                goto done;
 974        }
 975
 976        ret = kstrtoul(buf, 0, &domid);
 977        if (ret)
 978                goto done;
 979        if (domid > max_domid) {
 980                ret = -ENODEV;
 981                goto done;
 982        }
 983
 984        clear_bit_inv(domid, matrix_mdev->matrix.adm);
 985        ret = count;
 986done:
 987        mutex_unlock(&matrix_dev->lock);
 988        return ret;
 989}
 990static DEVICE_ATTR_WO(unassign_control_domain);
 991
 992static ssize_t control_domains_show(struct device *dev,
 993                                    struct device_attribute *dev_attr,
 994                                    char *buf)
 995{
 996        unsigned long id;
 997        int nchars = 0;
 998        int n;
 999        char *bufpos = buf;
1000        struct mdev_device *mdev = mdev_from_dev(dev);
1001        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1002        unsigned long max_domid = matrix_mdev->matrix.adm_max;
1003
1004        mutex_lock(&matrix_dev->lock);
1005        for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
1006                n = sprintf(bufpos, "%04lx\n", id);
1007                bufpos += n;
1008                nchars += n;
1009        }
1010        mutex_unlock(&matrix_dev->lock);
1011
1012        return nchars;
1013}
1014static DEVICE_ATTR_RO(control_domains);
1015
1016static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
1017                           char *buf)
1018{
1019        struct mdev_device *mdev = mdev_from_dev(dev);
1020        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1021        char *bufpos = buf;
1022        unsigned long apid;
1023        unsigned long apqi;
1024        unsigned long apid1;
1025        unsigned long apqi1;
1026        unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
1027        unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
1028        int nchars = 0;
1029        int n;
1030
1031        apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
1032        apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
1033
1034        mutex_lock(&matrix_dev->lock);
1035
1036        if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
1037                for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1038                        for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1039                                             naqm_bits) {
1040                                n = sprintf(bufpos, "%02lx.%04lx\n", apid,
1041                                            apqi);
1042                                bufpos += n;
1043                                nchars += n;
1044                        }
1045                }
1046        } else if (apid1 < napm_bits) {
1047                for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1048                        n = sprintf(bufpos, "%02lx.\n", apid);
1049                        bufpos += n;
1050                        nchars += n;
1051                }
1052        } else if (apqi1 < naqm_bits) {
1053                for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
1054                        n = sprintf(bufpos, ".%04lx\n", apqi);
1055                        bufpos += n;
1056                        nchars += n;
1057                }
1058        }
1059
1060        mutex_unlock(&matrix_dev->lock);
1061
1062        return nchars;
1063}
1064static DEVICE_ATTR_RO(matrix);
1065
1066static struct attribute *vfio_ap_mdev_attrs[] = {
1067        &dev_attr_assign_adapter.attr,
1068        &dev_attr_unassign_adapter.attr,
1069        &dev_attr_assign_domain.attr,
1070        &dev_attr_unassign_domain.attr,
1071        &dev_attr_assign_control_domain.attr,
1072        &dev_attr_unassign_control_domain.attr,
1073        &dev_attr_control_domains.attr,
1074        &dev_attr_matrix.attr,
1075        NULL,
1076};
1077
1078static struct attribute_group vfio_ap_mdev_attr_group = {
1079        .attrs = vfio_ap_mdev_attrs
1080};
1081
1082static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1083        &vfio_ap_mdev_attr_group,
1084        NULL
1085};
1086
1087/**
1088 * vfio_ap_mdev_set_kvm
1089 *
1090 * @matrix_mdev: a mediated matrix device
1091 * @kvm: reference to KVM instance
1092 *
1093 * Sets all data for @matrix_mdev that are needed to manage AP resources
1094 * for the guest whose state is represented by @kvm.
1095 *
1096 * Note: The matrix_dev->lock must be taken prior to calling
1097 * this function; however, the lock will be temporarily released while the
1098 * guest's AP configuration is set to avoid a potential lockdep splat.
1099 * The kvm->lock is taken to set the guest's AP configuration which, under
1100 * certain circumstances, will result in a circular lock dependency if this is
1101 * done under the @matrix_mdev->lock.
1102 *
1103 * Return 0 if no other mediated matrix device has a reference to @kvm;
1104 * otherwise, returns an -EPERM.
1105 */
1106static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1107                                struct kvm *kvm)
1108{
1109        struct ap_matrix_mdev *m;
1110
1111        if (kvm->arch.crypto.crycbd) {
1112                list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1113                        if (m != matrix_mdev && m->kvm == kvm)
1114                                return -EPERM;
1115                }
1116
1117                kvm_get_kvm(kvm);
1118                matrix_mdev->kvm_busy = true;
1119                mutex_unlock(&matrix_dev->lock);
1120                kvm_arch_crypto_set_masks(kvm,
1121                                          matrix_mdev->matrix.apm,
1122                                          matrix_mdev->matrix.aqm,
1123                                          matrix_mdev->matrix.adm);
1124                mutex_lock(&matrix_dev->lock);
1125                kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1126                matrix_mdev->kvm = kvm;
1127                matrix_mdev->kvm_busy = false;
1128                wake_up_all(&matrix_mdev->wait_for_kvm);
1129        }
1130
1131        return 0;
1132}
1133
1134/*
1135 * vfio_ap_mdev_iommu_notifier: IOMMU notifier callback
1136 *
1137 * @nb: The notifier block
1138 * @action: Action to be taken
1139 * @data: data associated with the request
1140 *
1141 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1142 * pinned before). Other requests are ignored.
1143 *
1144 */
1145static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1146                                       unsigned long action, void *data)
1147{
1148        struct ap_matrix_mdev *matrix_mdev;
1149
1150        matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1151
1152        if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1153                struct vfio_iommu_type1_dma_unmap *unmap = data;
1154                unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1155
1156                vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
1157                return NOTIFY_OK;
1158        }
1159
1160        return NOTIFY_DONE;
1161}
1162
1163/**
1164 * vfio_ap_mdev_unset_kvm
1165 *
1166 * @matrix_mdev: a matrix mediated device
1167 *
1168 * Performs clean-up of resources no longer needed by @matrix_mdev.
1169 *
1170 * Note: The matrix_dev->lock must be taken prior to calling
1171 * this function; however, the lock will be temporarily released while the
1172 * guest's AP configuration is cleared to avoid a potential lockdep splat.
1173 * The kvm->lock is taken to clear the guest's AP configuration which, under
1174 * certain circumstances, will result in a circular lock dependency if this is
1175 * done under the @matrix_mdev->lock.
1176 *
1177 */
1178static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
1179{
1180        /*
1181         * If the KVM pointer is in the process of being set, wait until the
1182         * process has completed.
1183         */
1184        wait_event_cmd(matrix_mdev->wait_for_kvm,
1185                       !matrix_mdev->kvm_busy,
1186                       mutex_unlock(&matrix_dev->lock),
1187                       mutex_lock(&matrix_dev->lock));
1188
1189        if (matrix_mdev->kvm) {
1190                matrix_mdev->kvm_busy = true;
1191                mutex_unlock(&matrix_dev->lock);
1192                kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
1193                mutex_lock(&matrix_dev->lock);
1194                vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
1195                matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
1196                kvm_put_kvm(matrix_mdev->kvm);
1197                matrix_mdev->kvm = NULL;
1198                matrix_mdev->kvm_busy = false;
1199                wake_up_all(&matrix_mdev->wait_for_kvm);
1200        }
1201}
1202
1203static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
1204                                       unsigned long action, void *data)
1205{
1206        int notify_rc = NOTIFY_OK;
1207        struct ap_matrix_mdev *matrix_mdev;
1208
1209        if (action != VFIO_GROUP_NOTIFY_SET_KVM)
1210                return NOTIFY_OK;
1211
1212        mutex_lock(&matrix_dev->lock);
1213        matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
1214
1215        if (!data)
1216                vfio_ap_mdev_unset_kvm(matrix_mdev);
1217        else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
1218                notify_rc = NOTIFY_DONE;
1219
1220        mutex_unlock(&matrix_dev->lock);
1221
1222        return notify_rc;
1223}
1224
1225static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1226{
1227        struct device *dev;
1228        struct vfio_ap_queue *q = NULL;
1229
1230        dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1231                                 &apqn, match_apqn);
1232        if (dev) {
1233                q = dev_get_drvdata(dev);
1234                put_device(dev);
1235        }
1236
1237        return q;
1238}
1239
1240int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1241                             unsigned int retry)
1242{
1243        struct ap_queue_status status;
1244        int ret;
1245        int retry2 = 2;
1246
1247        if (!q)
1248                return 0;
1249
1250retry_zapq:
1251        status = ap_zapq(q->apqn);
1252        switch (status.response_code) {
1253        case AP_RESPONSE_NORMAL:
1254                ret = 0;
1255                break;
1256        case AP_RESPONSE_RESET_IN_PROGRESS:
1257                if (retry--) {
1258                        msleep(20);
1259                        goto retry_zapq;
1260                }
1261                ret = -EBUSY;
1262                break;
1263        case AP_RESPONSE_Q_NOT_AVAIL:
1264        case AP_RESPONSE_DECONFIGURED:
1265        case AP_RESPONSE_CHECKSTOPPED:
1266                WARN_ON_ONCE(status.irq_enabled);
1267                ret = -EBUSY;
1268                goto free_resources;
1269        default:
1270                /* things are really broken, give up */
1271                WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1272                     status.response_code);
1273                return -EIO;
1274        }
1275
1276        /* wait for the reset to take effect */
1277        while (retry2--) {
1278                if (status.queue_empty && !status.irq_enabled)
1279                        break;
1280                msleep(20);
1281                status = ap_tapq(q->apqn, NULL);
1282        }
1283        WARN_ON_ONCE(retry2 <= 0);
1284
1285free_resources:
1286        vfio_ap_free_aqic_resources(q);
1287
1288        return ret;
1289}
1290
1291static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
1292{
1293        int ret;
1294        int rc = 0;
1295        unsigned long apid, apqi;
1296        struct vfio_ap_queue *q;
1297        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1298
1299        for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1300                             matrix_mdev->matrix.apm_max + 1) {
1301                for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1302                                     matrix_mdev->matrix.aqm_max + 1) {
1303                        q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1304                        ret = vfio_ap_mdev_reset_queue(q, 1);
1305                        /*
1306                         * Regardless whether a queue turns out to be busy, or
1307                         * is not operational, we need to continue resetting
1308                         * the remaining queues.
1309                         */
1310                        if (ret)
1311                                rc = ret;
1312                }
1313        }
1314
1315        return rc;
1316}
1317
1318static int vfio_ap_mdev_open(struct mdev_device *mdev)
1319{
1320        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1321        unsigned long events;
1322        int ret;
1323
1324
1325        if (!try_module_get(THIS_MODULE))
1326                return -ENODEV;
1327
1328        matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
1329        events = VFIO_GROUP_NOTIFY_SET_KVM;
1330
1331        ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
1332                                     &events, &matrix_mdev->group_notifier);
1333        if (ret) {
1334                module_put(THIS_MODULE);
1335                return ret;
1336        }
1337
1338        matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1339        events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1340        ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
1341                                     &events, &matrix_mdev->iommu_notifier);
1342        if (!ret)
1343                return ret;
1344
1345        vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
1346                                 &matrix_mdev->group_notifier);
1347        module_put(THIS_MODULE);
1348        return ret;
1349}
1350
1351static void vfio_ap_mdev_release(struct mdev_device *mdev)
1352{
1353        struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
1354
1355        mutex_lock(&matrix_dev->lock);
1356        vfio_ap_mdev_unset_kvm(matrix_mdev);
1357        mutex_unlock(&matrix_dev->lock);
1358
1359        vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
1360                                 &matrix_mdev->iommu_notifier);
1361        vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
1362                                 &matrix_mdev->group_notifier);
1363        module_put(THIS_MODULE);
1364}
1365
1366static int vfio_ap_mdev_get_device_info(unsigned long arg)
1367{
1368        unsigned long minsz;
1369        struct vfio_device_info info;
1370
1371        minsz = offsetofend(struct vfio_device_info, num_irqs);
1372
1373        if (copy_from_user(&info, (void __user *)arg, minsz))
1374                return -EFAULT;
1375
1376        if (info.argsz < minsz)
1377                return -EINVAL;
1378
1379        info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1380        info.num_regions = 0;
1381        info.num_irqs = 0;
1382
1383        return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1384}
1385
1386static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
1387                                    unsigned int cmd, unsigned long arg)
1388{
1389        int ret;
1390        struct ap_matrix_mdev *matrix_mdev;
1391
1392        mutex_lock(&matrix_dev->lock);
1393        switch (cmd) {
1394        case VFIO_DEVICE_GET_INFO:
1395                ret = vfio_ap_mdev_get_device_info(arg);
1396                break;
1397        case VFIO_DEVICE_RESET:
1398                matrix_mdev = mdev_get_drvdata(mdev);
1399                if (WARN(!matrix_mdev, "Driver data missing from mdev!!")) {
1400                        ret = -EINVAL;
1401                        break;
1402                }
1403
1404                /*
1405                 * If the KVM pointer is in the process of being set, wait until
1406                 * the process has completed.
1407                 */
1408                wait_event_cmd(matrix_mdev->wait_for_kvm,
1409                               !matrix_mdev->kvm_busy,
1410                               mutex_unlock(&matrix_dev->lock),
1411                               mutex_lock(&matrix_dev->lock));
1412
1413                ret = vfio_ap_mdev_reset_queues(mdev);
1414                break;
1415        default:
1416                ret = -EOPNOTSUPP;
1417                break;
1418        }
1419        mutex_unlock(&matrix_dev->lock);
1420
1421        return ret;
1422}
1423
1424static const struct mdev_parent_ops vfio_ap_matrix_ops = {
1425        .owner                  = THIS_MODULE,
1426        .supported_type_groups  = vfio_ap_mdev_type_groups,
1427        .mdev_attr_groups       = vfio_ap_mdev_attr_groups,
1428        .create                 = vfio_ap_mdev_create,
1429        .remove                 = vfio_ap_mdev_remove,
1430        .open                   = vfio_ap_mdev_open,
1431        .release                = vfio_ap_mdev_release,
1432        .ioctl                  = vfio_ap_mdev_ioctl,
1433};
1434
1435int vfio_ap_mdev_register(void)
1436{
1437        atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1438
1439        return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
1440}
1441
1442void vfio_ap_mdev_unregister(void)
1443{
1444        mdev_unregister_device(&matrix_dev->device);
1445}
1446