linux/drivers/xen/privcmd.c
<<
>>
Prefs
   1/******************************************************************************
   2 * privcmd.c
   3 *
   4 * Interface to privileged domain-0 commands.
   5 *
   6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
   7 */
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <linux/sched.h>
  12#include <linux/slab.h>
  13#include <linux/string.h>
  14#include <linux/errno.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/uaccess.h>
  18#include <linux/swap.h>
  19#include <linux/highmem.h>
  20#include <linux/pagemap.h>
  21#include <linux/seq_file.h>
  22#include <linux/miscdevice.h>
  23
  24#include <asm/pgalloc.h>
  25#include <asm/pgtable.h>
  26#include <asm/tlb.h>
  27#include <asm/xen/hypervisor.h>
  28#include <asm/xen/hypercall.h>
  29
  30#include <xen/xen.h>
  31#include <xen/privcmd.h>
  32#include <xen/interface/xen.h>
  33#include <xen/features.h>
  34#include <xen/page.h>
  35#include <xen/xen-ops.h>
  36#include <xen/balloon.h>
  37
  38#include "privcmd.h"
  39
  40MODULE_LICENSE("GPL");
  41
  42#define PRIV_VMA_LOCKED ((void *)1)
  43
  44#ifndef HAVE_ARCH_PRIVCMD_MMAP
  45static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
  46#endif
  47
  48static long privcmd_ioctl_hypercall(void __user *udata)
  49{
  50        struct privcmd_hypercall hypercall;
  51        long ret;
  52
  53        if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
  54                return -EFAULT;
  55
  56        ret = privcmd_call(hypercall.op,
  57                           hypercall.arg[0], hypercall.arg[1],
  58                           hypercall.arg[2], hypercall.arg[3],
  59                           hypercall.arg[4]);
  60
  61        return ret;
  62}
  63
  64static void free_page_list(struct list_head *pages)
  65{
  66        struct page *p, *n;
  67
  68        list_for_each_entry_safe(p, n, pages, lru)
  69                __free_page(p);
  70
  71        INIT_LIST_HEAD(pages);
  72}
  73
  74/*
  75 * Given an array of items in userspace, return a list of pages
  76 * containing the data.  If copying fails, either because of memory
  77 * allocation failure or a problem reading user memory, return an
  78 * error code; its up to the caller to dispose of any partial list.
  79 */
  80static int gather_array(struct list_head *pagelist,
  81                        unsigned nelem, size_t size,
  82                        const void __user *data)
  83{
  84        unsigned pageidx;
  85        void *pagedata;
  86        int ret;
  87
  88        if (size > PAGE_SIZE)
  89                return 0;
  90
  91        pageidx = PAGE_SIZE;
  92        pagedata = NULL;        /* quiet, gcc */
  93        while (nelem--) {
  94                if (pageidx > PAGE_SIZE-size) {
  95                        struct page *page = alloc_page(GFP_KERNEL);
  96
  97                        ret = -ENOMEM;
  98                        if (page == NULL)
  99                                goto fail;
 100
 101                        pagedata = page_address(page);
 102
 103                        list_add_tail(&page->lru, pagelist);
 104                        pageidx = 0;
 105                }
 106
 107                ret = -EFAULT;
 108                if (copy_from_user(pagedata + pageidx, data, size))
 109                        goto fail;
 110
 111                data += size;
 112                pageidx += size;
 113        }
 114
 115        ret = 0;
 116
 117fail:
 118        return ret;
 119}
 120
 121/*
 122 * Call function "fn" on each element of the array fragmented
 123 * over a list of pages.
 124 */
 125static int traverse_pages(unsigned nelem, size_t size,
 126                          struct list_head *pos,
 127                          int (*fn)(void *data, void *state),
 128                          void *state)
 129{
 130        void *pagedata;
 131        unsigned pageidx;
 132        int ret = 0;
 133
 134        BUG_ON(size > PAGE_SIZE);
 135
 136        pageidx = PAGE_SIZE;
 137        pagedata = NULL;        /* hush, gcc */
 138
 139        while (nelem--) {
 140                if (pageidx > PAGE_SIZE-size) {
 141                        struct page *page;
 142                        pos = pos->next;
 143                        page = list_entry(pos, struct page, lru);
 144                        pagedata = page_address(page);
 145                        pageidx = 0;
 146                }
 147
 148                ret = (*fn)(pagedata + pageidx, state);
 149                if (ret)
 150                        break;
 151                pageidx += size;
 152        }
 153
 154        return ret;
 155}
 156
 157struct mmap_mfn_state {
 158        unsigned long va;
 159        struct vm_area_struct *vma;
 160        domid_t domain;
 161};
 162
 163static int mmap_mfn_range(void *data, void *state)
 164{
 165        struct privcmd_mmap_entry *msg = data;
 166        struct mmap_mfn_state *st = state;
 167        struct vm_area_struct *vma = st->vma;
 168        int rc;
 169
 170        /* Do not allow range to wrap the address space. */
 171        if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
 172            ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
 173                return -EINVAL;
 174
 175        /* Range chunks must be contiguous in va space. */
 176        if ((msg->va != st->va) ||
 177            ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
 178                return -EINVAL;
 179
 180        rc = xen_remap_domain_mfn_range(vma,
 181                                        msg->va & PAGE_MASK,
 182                                        msg->mfn, msg->npages,
 183                                        vma->vm_page_prot,
 184                                        st->domain, NULL);
 185        if (rc < 0)
 186                return rc;
 187
 188        st->va += msg->npages << PAGE_SHIFT;
 189
 190        return 0;
 191}
 192
 193static long privcmd_ioctl_mmap(void __user *udata)
 194{
 195        struct privcmd_mmap mmapcmd;
 196        struct mm_struct *mm = current->mm;
 197        struct vm_area_struct *vma;
 198        int rc;
 199        LIST_HEAD(pagelist);
 200        struct mmap_mfn_state state;
 201
 202        /* We only support privcmd_ioctl_mmap_batch for auto translated. */
 203        if (xen_feature(XENFEAT_auto_translated_physmap))
 204                return -ENOSYS;
 205
 206        if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
 207                return -EFAULT;
 208
 209        rc = gather_array(&pagelist,
 210                          mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 211                          mmapcmd.entry);
 212
 213        if (rc || list_empty(&pagelist))
 214                goto out;
 215
 216        down_write(&mm->mmap_sem);
 217
 218        {
 219                struct page *page = list_first_entry(&pagelist,
 220                                                     struct page, lru);
 221                struct privcmd_mmap_entry *msg = page_address(page);
 222
 223                vma = find_vma(mm, msg->va);
 224                rc = -EINVAL;
 225
 226                if (!vma || (msg->va != vma->vm_start) ||
 227                    !privcmd_enforce_singleshot_mapping(vma))
 228                        goto out_up;
 229        }
 230
 231        state.va = vma->vm_start;
 232        state.vma = vma;
 233        state.domain = mmapcmd.dom;
 234
 235        rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 236                            &pagelist,
 237                            mmap_mfn_range, &state);
 238
 239
 240out_up:
 241        up_write(&mm->mmap_sem);
 242
 243out:
 244        free_page_list(&pagelist);
 245
 246        return rc;
 247}
 248
 249struct mmap_batch_state {
 250        domid_t domain;
 251        unsigned long va;
 252        struct vm_area_struct *vma;
 253        int index;
 254        /* A tristate:
 255         *      0 for no errors
 256         *      1 if at least one error has happened (and no
 257         *          -ENOENT errors have happened)
 258         *      -ENOENT if at least 1 -ENOENT has happened.
 259         */
 260        int global_error;
 261        int version;
 262
 263        /* User-space mfn array to store errors in the second pass for V1. */
 264        xen_pfn_t __user *user_mfn;
 265        /* User-space int array to store errors in the second pass for V2. */
 266        int __user *user_err;
 267};
 268
 269/* auto translated dom0 note: if domU being created is PV, then mfn is
 270 * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
 271 */
 272static int mmap_batch_fn(void *data, void *state)
 273{
 274        xen_pfn_t *mfnp = data;
 275        struct mmap_batch_state *st = state;
 276        struct vm_area_struct *vma = st->vma;
 277        struct page **pages = vma->vm_private_data;
 278        struct page *cur_page = NULL;
 279        int ret;
 280
 281        if (xen_feature(XENFEAT_auto_translated_physmap))
 282                cur_page = pages[st->index++];
 283
 284        ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
 285                                         st->vma->vm_page_prot, st->domain,
 286                                         &cur_page);
 287
 288        /* Store error code for second pass. */
 289        if (st->version == 1) {
 290                if (ret < 0) {
 291                        /*
 292                         * V1 encodes the error codes in the 32bit top nibble of the
 293                         * mfn (with its known limitations vis-a-vis 64 bit callers).
 294                         */
 295                        *mfnp |= (ret == -ENOENT) ?
 296                                                PRIVCMD_MMAPBATCH_PAGED_ERROR :
 297                                                PRIVCMD_MMAPBATCH_MFN_ERROR;
 298                }
 299        } else { /* st->version == 2 */
 300                *((int *) mfnp) = ret;
 301        }
 302
 303        /* And see if it affects the global_error. */
 304        if (ret < 0) {
 305                if (ret == -ENOENT)
 306                        st->global_error = -ENOENT;
 307                else {
 308                        /* Record that at least one error has happened. */
 309                        if (st->global_error == 0)
 310                                st->global_error = 1;
 311                }
 312        }
 313        st->va += PAGE_SIZE;
 314
 315        return 0;
 316}
 317
 318static int mmap_return_errors(void *data, void *state)
 319{
 320        struct mmap_batch_state *st = state;
 321
 322        if (st->version == 1) {
 323                xen_pfn_t mfnp = *((xen_pfn_t *) data);
 324                if (mfnp & PRIVCMD_MMAPBATCH_MFN_ERROR)
 325                        return __put_user(mfnp, st->user_mfn++);
 326                else
 327                        st->user_mfn++;
 328        } else { /* st->version == 2 */
 329                int err = *((int *) data);
 330                if (err)
 331                        return __put_user(err, st->user_err++);
 332                else
 333                        st->user_err++;
 334        }
 335
 336        return 0;
 337}
 338
 339/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
 340 * the vma with the page info to use later.
 341 * Returns: 0 if success, otherwise -errno
 342 */
 343static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
 344{
 345        int rc;
 346        struct page **pages;
 347
 348        pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
 349        if (pages == NULL)
 350                return -ENOMEM;
 351
 352        rc = alloc_xenballooned_pages(numpgs, pages, 0);
 353        if (rc != 0) {
 354                pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
 355                        numpgs, rc);
 356                kfree(pages);
 357                return -ENOMEM;
 358        }
 359        BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
 360        vma->vm_private_data = pages;
 361
 362        return 0;
 363}
 364
 365static struct vm_operations_struct privcmd_vm_ops;
 366
 367static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 368{
 369        int ret;
 370        struct privcmd_mmapbatch_v2 m;
 371        struct mm_struct *mm = current->mm;
 372        struct vm_area_struct *vma;
 373        unsigned long nr_pages;
 374        LIST_HEAD(pagelist);
 375        struct mmap_batch_state state;
 376
 377        switch (version) {
 378        case 1:
 379                if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
 380                        return -EFAULT;
 381                /* Returns per-frame error in m.arr. */
 382                m.err = NULL;
 383                if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
 384                        return -EFAULT;
 385                break;
 386        case 2:
 387                if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
 388                        return -EFAULT;
 389                /* Returns per-frame error code in m.err. */
 390                if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
 391                        return -EFAULT;
 392                break;
 393        default:
 394                return -EINVAL;
 395        }
 396
 397        nr_pages = m.num;
 398        if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
 399                return -EINVAL;
 400
 401        ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
 402
 403        if (ret)
 404                goto out;
 405        if (list_empty(&pagelist)) {
 406                ret = -EINVAL;
 407                goto out;
 408        }
 409
 410        if (version == 2) {
 411                /* Zero error array now to only copy back actual errors. */
 412                if (clear_user(m.err, sizeof(int) * m.num)) {
 413                        ret = -EFAULT;
 414                        goto out;
 415                }
 416        }
 417
 418        down_write(&mm->mmap_sem);
 419
 420        vma = find_vma(mm, m.addr);
 421        if (!vma ||
 422            vma->vm_ops != &privcmd_vm_ops ||
 423            (m.addr != vma->vm_start) ||
 424            ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
 425            !privcmd_enforce_singleshot_mapping(vma)) {
 426                up_write(&mm->mmap_sem);
 427                ret = -EINVAL;
 428                goto out;
 429        }
 430        if (xen_feature(XENFEAT_auto_translated_physmap)) {
 431                ret = alloc_empty_pages(vma, m.num);
 432                if (ret < 0) {
 433                        up_write(&mm->mmap_sem);
 434                        goto out;
 435                }
 436        }
 437
 438        state.domain        = m.dom;
 439        state.vma           = vma;
 440        state.va            = m.addr;
 441        state.index         = 0;
 442        state.global_error  = 0;
 443        state.version       = version;
 444
 445        /* mmap_batch_fn guarantees ret == 0 */
 446        BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
 447                             &pagelist, mmap_batch_fn, &state));
 448
 449        up_write(&mm->mmap_sem);
 450
 451        if (state.global_error) {
 452                /* Write back errors in second pass. */
 453                state.user_mfn = (xen_pfn_t *)m.arr;
 454                state.user_err = m.err;
 455                ret = traverse_pages(m.num, sizeof(xen_pfn_t),
 456                                                         &pagelist, mmap_return_errors, &state);
 457        } else
 458                ret = 0;
 459
 460        /* If we have not had any EFAULT-like global errors then set the global
 461         * error to -ENOENT if necessary. */
 462        if ((ret == 0) && (state.global_error == -ENOENT))
 463                ret = -ENOENT;
 464
 465out:
 466        free_page_list(&pagelist);
 467
 468        return ret;
 469}
 470
 471static long privcmd_ioctl(struct file *file,
 472                          unsigned int cmd, unsigned long data)
 473{
 474        int ret = -ENOSYS;
 475        void __user *udata = (void __user *) data;
 476
 477        switch (cmd) {
 478        case IOCTL_PRIVCMD_HYPERCALL:
 479                ret = privcmd_ioctl_hypercall(udata);
 480                break;
 481
 482        case IOCTL_PRIVCMD_MMAP:
 483                ret = privcmd_ioctl_mmap(udata);
 484                break;
 485
 486        case IOCTL_PRIVCMD_MMAPBATCH:
 487                ret = privcmd_ioctl_mmap_batch(udata, 1);
 488                break;
 489
 490        case IOCTL_PRIVCMD_MMAPBATCH_V2:
 491                ret = privcmd_ioctl_mmap_batch(udata, 2);
 492                break;
 493
 494        default:
 495                ret = -EINVAL;
 496                break;
 497        }
 498
 499        return ret;
 500}
 501
 502static void privcmd_close(struct vm_area_struct *vma)
 503{
 504        struct page **pages = vma->vm_private_data;
 505        int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 506
 507        if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages))
 508                return;
 509
 510        xen_unmap_domain_mfn_range(vma, numpgs, pages);
 511        free_xenballooned_pages(numpgs, pages);
 512        kfree(pages);
 513}
 514
 515static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 516{
 517        printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
 518               vma, vma->vm_start, vma->vm_end,
 519               vmf->pgoff, vmf->virtual_address);
 520
 521        return VM_FAULT_SIGBUS;
 522}
 523
 524static struct vm_operations_struct privcmd_vm_ops = {
 525        .close = privcmd_close,
 526        .fault = privcmd_fault
 527};
 528
 529static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 530{
 531        /* DONTCOPY is essential for Xen because copy_page_range doesn't know
 532         * how to recreate these mappings */
 533        vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
 534                         VM_DONTEXPAND | VM_DONTDUMP;
 535        vma->vm_ops = &privcmd_vm_ops;
 536        vma->vm_private_data = NULL;
 537
 538        return 0;
 539}
 540
 541static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
 542{
 543        return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
 544}
 545
 546const struct file_operations xen_privcmd_fops = {
 547        .owner = THIS_MODULE,
 548        .unlocked_ioctl = privcmd_ioctl,
 549        .mmap = privcmd_mmap,
 550};
 551EXPORT_SYMBOL_GPL(xen_privcmd_fops);
 552
 553static struct miscdevice privcmd_dev = {
 554        .minor = MISC_DYNAMIC_MINOR,
 555        .name = "xen/privcmd",
 556        .fops = &xen_privcmd_fops,
 557};
 558
 559static int __init privcmd_init(void)
 560{
 561        int err;
 562
 563        if (!xen_domain())
 564                return -ENODEV;
 565
 566        err = misc_register(&privcmd_dev);
 567        if (err != 0) {
 568                printk(KERN_ERR "Could not register Xen privcmd device\n");
 569                return err;
 570        }
 571        return 0;
 572}
 573
 574static void __exit privcmd_exit(void)
 575{
 576        misc_deregister(&privcmd_dev);
 577}
 578
 579module_init(privcmd_init);
 580module_exit(privcmd_exit);
 581
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.