linux/drivers/vfio/vfio_iommu_spapr_tce.c
<<
>>
Prefs
   1/*
   2 * VFIO: IOMMU DMA mapping support for TCE on POWER
   3 *
   4 * Copyright (C) 2013 IBM Corp.  All rights reserved.
   5 *     Author: Alexey Kardashevskiy <aik@ozlabs.ru>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * Derived from original vfio_iommu_type1.c:
  12 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
  13 *     Author: Alex Williamson <alex.williamson@redhat.com>
  14 */
  15
  16#include <linux/module.h>
  17#include <linux/pci.h>
  18#include <linux/slab.h>
  19#include <linux/uaccess.h>
  20#include <linux/err.h>
  21#include <linux/vfio.h>
  22#include <asm/iommu.h>
  23#include <asm/tce.h>
  24
  25#define DRIVER_VERSION  "0.1"
  26#define DRIVER_AUTHOR   "aik@ozlabs.ru"
  27#define DRIVER_DESC     "VFIO IOMMU SPAPR TCE"
  28
  29static void tce_iommu_detach_group(void *iommu_data,
  30                struct iommu_group *iommu_group);
  31
  32/*
  33 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
  34 *
  35 * This code handles mapping and unmapping of user data buffers
  36 * into DMA'ble space using the IOMMU
  37 */
  38
  39/*
  40 * The container descriptor supports only a single group per container.
  41 * Required by the API as the container is not supplied with the IOMMU group
  42 * at the moment of initialization.
  43 */
  44struct tce_container {
  45        struct mutex lock;
  46        struct iommu_table *tbl;
  47        bool enabled;
  48};
  49
  50static int tce_iommu_enable(struct tce_container *container)
  51{
  52        int ret = 0;
  53        unsigned long locked, lock_limit, npages;
  54        struct iommu_table *tbl = container->tbl;
  55
  56        if (!container->tbl)
  57                return -ENXIO;
  58
  59        if (!current->mm)
  60                return -ESRCH; /* process exited */
  61
  62        if (container->enabled)
  63                return -EBUSY;
  64
  65        /*
  66         * When userspace pages are mapped into the IOMMU, they are effectively
  67         * locked memory, so, theoretically, we need to update the accounting
  68         * of locked pages on each map and unmap.  For powerpc, the map unmap
  69         * paths can be very hot, though, and the accounting would kill
  70         * performance, especially since it would be difficult to impossible
  71         * to handle the accounting in real mode only.
  72         *
  73         * To address that, rather than precisely accounting every page, we
  74         * instead account for a worst case on locked memory when the iommu is
  75         * enabled and disabled.  The worst case upper bound on locked memory
  76         * is the size of the whole iommu window, which is usually relatively
  77         * small (compared to total memory sizes) on POWER hardware.
  78         *
  79         * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
  80         * that would effectively kill the guest at random points, much better
  81         * enforcing the limit based on the max that the guest can map.
  82         */
  83        down_write(&current->mm->mmap_sem);
  84        npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT;
  85        locked = current->mm->locked_vm + npages;
  86        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  87        if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
  88                pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
  89                                rlimit(RLIMIT_MEMLOCK));
  90                ret = -ENOMEM;
  91        } else {
  92
  93                current->mm->locked_vm += npages;
  94                container->enabled = true;
  95        }
  96        up_write(&current->mm->mmap_sem);
  97
  98        return ret;
  99}
 100
 101static void tce_iommu_disable(struct tce_container *container)
 102{
 103        if (!container->enabled)
 104                return;
 105
 106        container->enabled = false;
 107
 108        if (!container->tbl || !current->mm)
 109                return;
 110
 111        down_write(&current->mm->mmap_sem);
 112        current->mm->locked_vm -= (container->tbl->it_size <<
 113                        IOMMU_PAGE_SHIFT) >> PAGE_SHIFT;
 114        up_write(&current->mm->mmap_sem);
 115}
 116
 117static void *tce_iommu_open(unsigned long arg)
 118{
 119        struct tce_container *container;
 120
 121        if (arg != VFIO_SPAPR_TCE_IOMMU) {
 122                pr_err("tce_vfio: Wrong IOMMU type\n");
 123                return ERR_PTR(-EINVAL);
 124        }
 125
 126        container = kzalloc(sizeof(*container), GFP_KERNEL);
 127        if (!container)
 128                return ERR_PTR(-ENOMEM);
 129
 130        mutex_init(&container->lock);
 131
 132        return container;
 133}
 134
 135static void tce_iommu_release(void *iommu_data)
 136{
 137        struct tce_container *container = iommu_data;
 138
 139        WARN_ON(container->tbl && !container->tbl->it_group);
 140        tce_iommu_disable(container);
 141
 142        if (container->tbl && container->tbl->it_group)
 143                tce_iommu_detach_group(iommu_data, container->tbl->it_group);
 144
 145        mutex_destroy(&container->lock);
 146
 147        kfree(container);
 148}
 149
 150static long tce_iommu_ioctl(void *iommu_data,
 151                                 unsigned int cmd, unsigned long arg)
 152{
 153        struct tce_container *container = iommu_data;
 154        unsigned long minsz;
 155        long ret;
 156
 157        switch (cmd) {
 158        case VFIO_CHECK_EXTENSION:
 159                return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
 160
 161        case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 162                struct vfio_iommu_spapr_tce_info info;
 163                struct iommu_table *tbl = container->tbl;
 164
 165                if (WARN_ON(!tbl))
 166                        return -ENXIO;
 167
 168                minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
 169                                dma32_window_size);
 170
 171                if (copy_from_user(&info, (void __user *)arg, minsz))
 172                        return -EFAULT;
 173
 174                if (info.argsz < minsz)
 175                        return -EINVAL;
 176
 177                info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT;
 178                info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT;
 179                info.flags = 0;
 180
 181                if (copy_to_user((void __user *)arg, &info, minsz))
 182                        return -EFAULT;
 183
 184                return 0;
 185        }
 186        case VFIO_IOMMU_MAP_DMA: {
 187                struct vfio_iommu_type1_dma_map param;
 188                struct iommu_table *tbl = container->tbl;
 189                unsigned long tce, i;
 190
 191                if (!tbl)
 192                        return -ENXIO;
 193
 194                BUG_ON(!tbl->it_group);
 195
 196                minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
 197
 198                if (copy_from_user(&param, (void __user *)arg, minsz))
 199                        return -EFAULT;
 200
 201                if (param.argsz < minsz)
 202                        return -EINVAL;
 203
 204                if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
 205                                VFIO_DMA_MAP_FLAG_WRITE))
 206                        return -EINVAL;
 207
 208                if ((param.size & ~IOMMU_PAGE_MASK) ||
 209                                (param.vaddr & ~IOMMU_PAGE_MASK))
 210                        return -EINVAL;
 211
 212                /* iova is checked by the IOMMU API */
 213                tce = param.vaddr;
 214                if (param.flags & VFIO_DMA_MAP_FLAG_READ)
 215                        tce |= TCE_PCI_READ;
 216                if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
 217                        tce |= TCE_PCI_WRITE;
 218
 219                ret = iommu_tce_put_param_check(tbl, param.iova, tce);
 220                if (ret)
 221                        return ret;
 222
 223                for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) {
 224                        ret = iommu_put_tce_user_mode(tbl,
 225                                        (param.iova >> IOMMU_PAGE_SHIFT) + i,
 226                                        tce);
 227                        if (ret)
 228                                break;
 229                        tce += IOMMU_PAGE_SIZE;
 230                }
 231                if (ret)
 232                        iommu_clear_tces_and_put_pages(tbl,
 233                                        param.iova >> IOMMU_PAGE_SHIFT, i);
 234
 235                iommu_flush_tce(tbl);
 236
 237                return ret;
 238        }
 239        case VFIO_IOMMU_UNMAP_DMA: {
 240                struct vfio_iommu_type1_dma_unmap param;
 241                struct iommu_table *tbl = container->tbl;
 242
 243                if (WARN_ON(!tbl))
 244                        return -ENXIO;
 245
 246                minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
 247                                size);
 248
 249                if (copy_from_user(&param, (void __user *)arg, minsz))
 250                        return -EFAULT;
 251
 252                if (param.argsz < minsz)
 253                        return -EINVAL;
 254
 255                /* No flag is supported now */
 256                if (param.flags)
 257                        return -EINVAL;
 258
 259                if (param.size & ~IOMMU_PAGE_MASK)
 260                        return -EINVAL;
 261
 262                ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
 263                                param.size >> IOMMU_PAGE_SHIFT);
 264                if (ret)
 265                        return ret;
 266
 267                ret = iommu_clear_tces_and_put_pages(tbl,
 268                                param.iova >> IOMMU_PAGE_SHIFT,
 269                                param.size >> IOMMU_PAGE_SHIFT);
 270                iommu_flush_tce(tbl);
 271
 272                return ret;
 273        }
 274        case VFIO_IOMMU_ENABLE:
 275                mutex_lock(&container->lock);
 276                ret = tce_iommu_enable(container);
 277                mutex_unlock(&container->lock);
 278                return ret;
 279
 280
 281        case VFIO_IOMMU_DISABLE:
 282                mutex_lock(&container->lock);
 283                tce_iommu_disable(container);
 284                mutex_unlock(&container->lock);
 285                return 0;
 286        }
 287
 288        return -ENOTTY;
 289}
 290
 291static int tce_iommu_attach_group(void *iommu_data,
 292                struct iommu_group *iommu_group)
 293{
 294        int ret;
 295        struct tce_container *container = iommu_data;
 296        struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
 297
 298        BUG_ON(!tbl);
 299        mutex_lock(&container->lock);
 300
 301        /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
 302                        iommu_group_id(iommu_group), iommu_group); */
 303        if (container->tbl) {
 304                pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
 305                                iommu_group_id(container->tbl->it_group),
 306                                iommu_group_id(iommu_group));
 307                ret = -EBUSY;
 308        } else if (container->enabled) {
 309                pr_err("tce_vfio: attaching group #%u to enabled container\n",
 310                                iommu_group_id(iommu_group));
 311                ret = -EBUSY;
 312        } else {
 313                ret = iommu_take_ownership(tbl);
 314                if (!ret)
 315                        container->tbl = tbl;
 316        }
 317
 318        mutex_unlock(&container->lock);
 319
 320        return ret;
 321}
 322
 323static void tce_iommu_detach_group(void *iommu_data,
 324                struct iommu_group *iommu_group)
 325{
 326        struct tce_container *container = iommu_data;
 327        struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
 328
 329        BUG_ON(!tbl);
 330        mutex_lock(&container->lock);
 331        if (tbl != container->tbl) {
 332                pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
 333                                iommu_group_id(iommu_group),
 334                                iommu_group_id(tbl->it_group));
 335        } else {
 336                if (container->enabled) {
 337                        pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
 338                                        iommu_group_id(tbl->it_group));
 339                        tce_iommu_disable(container);
 340                }
 341
 342                /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
 343                                iommu_group_id(iommu_group), iommu_group); */
 344                container->tbl = NULL;
 345                iommu_release_ownership(tbl);
 346        }
 347        mutex_unlock(&container->lock);
 348}
 349
 350const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
 351        .name           = "iommu-vfio-powerpc",
 352        .owner          = THIS_MODULE,
 353        .open           = tce_iommu_open,
 354        .release        = tce_iommu_release,
 355        .ioctl          = tce_iommu_ioctl,
 356        .attach_group   = tce_iommu_attach_group,
 357        .detach_group   = tce_iommu_detach_group,
 358};
 359
 360static int __init tce_iommu_init(void)
 361{
 362        return vfio_register_iommu_driver(&tce_iommu_driver_ops);
 363}
 364
 365static void __exit tce_iommu_cleanup(void)
 366{
 367        vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
 368}
 369
 370module_init(tce_iommu_init);
 371module_exit(tce_iommu_cleanup);
 372
 373MODULE_VERSION(DRIVER_VERSION);
 374MODULE_LICENSE("GPL v2");
 375MODULE_AUTHOR(DRIVER_AUTHOR);
 376MODULE_DESCRIPTION(DRIVER_DESC);
 377
 378
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.