linux/drivers/xen/privcmd.c
<<
>>
Prefs
   1/******************************************************************************
   2 * privcmd.c
   3 *
   4 * Interface to privileged domain-0 commands.
   5 *
   6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
   7 */
   8
   9#include <linux/kernel.h>
  10#include <linux/module.h>
  11#include <linux/sched.h>
  12#include <linux/slab.h>
  13#include <linux/string.h>
  14#include <linux/errno.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/uaccess.h>
  18#include <linux/swap.h>
  19#include <linux/highmem.h>
  20#include <linux/pagemap.h>
  21#include <linux/seq_file.h>
  22#include <linux/miscdevice.h>
  23
  24#include <asm/pgalloc.h>
  25#include <asm/pgtable.h>
  26#include <asm/tlb.h>
  27#include <asm/xen/hypervisor.h>
  28#include <asm/xen/hypercall.h>
  29
  30#include <xen/xen.h>
  31#include <xen/privcmd.h>
  32#include <xen/interface/xen.h>
  33#include <xen/features.h>
  34#include <xen/page.h>
  35#include <xen/xen-ops.h>
  36
  37#include "privcmd.h"
  38
  39MODULE_LICENSE("GPL");
  40
  41#ifndef HAVE_ARCH_PRIVCMD_MMAP
  42static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
  43#endif
  44
  45static long privcmd_ioctl_hypercall(void __user *udata)
  46{
  47        struct privcmd_hypercall hypercall;
  48        long ret;
  49
  50        if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
  51                return -EFAULT;
  52
  53        ret = privcmd_call(hypercall.op,
  54                           hypercall.arg[0], hypercall.arg[1],
  55                           hypercall.arg[2], hypercall.arg[3],
  56                           hypercall.arg[4]);
  57
  58        return ret;
  59}
  60
  61static void free_page_list(struct list_head *pages)
  62{
  63        struct page *p, *n;
  64
  65        list_for_each_entry_safe(p, n, pages, lru)
  66                __free_page(p);
  67
  68        INIT_LIST_HEAD(pages);
  69}
  70
  71/*
  72 * Given an array of items in userspace, return a list of pages
  73 * containing the data.  If copying fails, either because of memory
  74 * allocation failure or a problem reading user memory, return an
  75 * error code; its up to the caller to dispose of any partial list.
  76 */
  77static int gather_array(struct list_head *pagelist,
  78                        unsigned nelem, size_t size,
  79                        void __user *data)
  80{
  81        unsigned pageidx;
  82        void *pagedata;
  83        int ret;
  84
  85        if (size > PAGE_SIZE)
  86                return 0;
  87
  88        pageidx = PAGE_SIZE;
  89        pagedata = NULL;        /* quiet, gcc */
  90        while (nelem--) {
  91                if (pageidx > PAGE_SIZE-size) {
  92                        struct page *page = alloc_page(GFP_KERNEL);
  93
  94                        ret = -ENOMEM;
  95                        if (page == NULL)
  96                                goto fail;
  97
  98                        pagedata = page_address(page);
  99
 100                        list_add_tail(&page->lru, pagelist);
 101                        pageidx = 0;
 102                }
 103
 104                ret = -EFAULT;
 105                if (copy_from_user(pagedata + pageidx, data, size))
 106                        goto fail;
 107
 108                data += size;
 109                pageidx += size;
 110        }
 111
 112        ret = 0;
 113
 114fail:
 115        return ret;
 116}
 117
 118/*
 119 * Call function "fn" on each element of the array fragmented
 120 * over a list of pages.
 121 */
 122static int traverse_pages(unsigned nelem, size_t size,
 123                          struct list_head *pos,
 124                          int (*fn)(void *data, void *state),
 125                          void *state)
 126{
 127        void *pagedata;
 128        unsigned pageidx;
 129        int ret = 0;
 130
 131        BUG_ON(size > PAGE_SIZE);
 132
 133        pageidx = PAGE_SIZE;
 134        pagedata = NULL;        /* hush, gcc */
 135
 136        while (nelem--) {
 137                if (pageidx > PAGE_SIZE-size) {
 138                        struct page *page;
 139                        pos = pos->next;
 140                        page = list_entry(pos, struct page, lru);
 141                        pagedata = page_address(page);
 142                        pageidx = 0;
 143                }
 144
 145                ret = (*fn)(pagedata + pageidx, state);
 146                if (ret)
 147                        break;
 148                pageidx += size;
 149        }
 150
 151        return ret;
 152}
 153
 154struct mmap_mfn_state {
 155        unsigned long va;
 156        struct vm_area_struct *vma;
 157        domid_t domain;
 158};
 159
 160static int mmap_mfn_range(void *data, void *state)
 161{
 162        struct privcmd_mmap_entry *msg = data;
 163        struct mmap_mfn_state *st = state;
 164        struct vm_area_struct *vma = st->vma;
 165        int rc;
 166
 167        /* Do not allow range to wrap the address space. */
 168        if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
 169            ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
 170                return -EINVAL;
 171
 172        /* Range chunks must be contiguous in va space. */
 173        if ((msg->va != st->va) ||
 174            ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
 175                return -EINVAL;
 176
 177        rc = xen_remap_domain_mfn_range(vma,
 178                                        msg->va & PAGE_MASK,
 179                                        msg->mfn, msg->npages,
 180                                        vma->vm_page_prot,
 181                                        st->domain);
 182        if (rc < 0)
 183                return rc;
 184
 185        st->va += msg->npages << PAGE_SHIFT;
 186
 187        return 0;
 188}
 189
 190static long privcmd_ioctl_mmap(void __user *udata)
 191{
 192        struct privcmd_mmap mmapcmd;
 193        struct mm_struct *mm = current->mm;
 194        struct vm_area_struct *vma;
 195        int rc;
 196        LIST_HEAD(pagelist);
 197        struct mmap_mfn_state state;
 198
 199        if (!xen_initial_domain())
 200                return -EPERM;
 201
 202        if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
 203                return -EFAULT;
 204
 205        rc = gather_array(&pagelist,
 206                          mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 207                          mmapcmd.entry);
 208
 209        if (rc || list_empty(&pagelist))
 210                goto out;
 211
 212        down_write(&mm->mmap_sem);
 213
 214        {
 215                struct page *page = list_first_entry(&pagelist,
 216                                                     struct page, lru);
 217                struct privcmd_mmap_entry *msg = page_address(page);
 218
 219                vma = find_vma(mm, msg->va);
 220                rc = -EINVAL;
 221
 222                if (!vma || (msg->va != vma->vm_start) ||
 223                    !privcmd_enforce_singleshot_mapping(vma))
 224                        goto out_up;
 225        }
 226
 227        state.va = vma->vm_start;
 228        state.vma = vma;
 229        state.domain = mmapcmd.dom;
 230
 231        rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
 232                            &pagelist,
 233                            mmap_mfn_range, &state);
 234
 235
 236out_up:
 237        up_write(&mm->mmap_sem);
 238
 239out:
 240        free_page_list(&pagelist);
 241
 242        return rc;
 243}
 244
 245struct mmap_batch_state {
 246        domid_t domain;
 247        unsigned long va;
 248        struct vm_area_struct *vma;
 249        int err;
 250
 251        xen_pfn_t __user *user;
 252};
 253
 254static int mmap_batch_fn(void *data, void *state)
 255{
 256        xen_pfn_t *mfnp = data;
 257        struct mmap_batch_state *st = state;
 258
 259        if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
 260                                       st->vma->vm_page_prot, st->domain) < 0) {
 261                *mfnp |= 0xf0000000U;
 262                st->err++;
 263        }
 264        st->va += PAGE_SIZE;
 265
 266        return 0;
 267}
 268
 269static int mmap_return_errors(void *data, void *state)
 270{
 271        xen_pfn_t *mfnp = data;
 272        struct mmap_batch_state *st = state;
 273
 274        return put_user(*mfnp, st->user++);
 275}
 276
 277static struct vm_operations_struct privcmd_vm_ops;
 278
 279static long privcmd_ioctl_mmap_batch(void __user *udata)
 280{
 281        int ret;
 282        struct privcmd_mmapbatch m;
 283        struct mm_struct *mm = current->mm;
 284        struct vm_area_struct *vma;
 285        unsigned long nr_pages;
 286        LIST_HEAD(pagelist);
 287        struct mmap_batch_state state;
 288
 289        if (!xen_initial_domain())
 290                return -EPERM;
 291
 292        if (copy_from_user(&m, udata, sizeof(m)))
 293                return -EFAULT;
 294
 295        nr_pages = m.num;
 296        if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
 297                return -EINVAL;
 298
 299        ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
 300                           m.arr);
 301
 302        if (ret || list_empty(&pagelist))
 303                goto out;
 304
 305        down_write(&mm->mmap_sem);
 306
 307        vma = find_vma(mm, m.addr);
 308        ret = -EINVAL;
 309        if (!vma ||
 310            vma->vm_ops != &privcmd_vm_ops ||
 311            (m.addr != vma->vm_start) ||
 312            ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
 313            !privcmd_enforce_singleshot_mapping(vma)) {
 314                up_write(&mm->mmap_sem);
 315                goto out;
 316        }
 317
 318        state.domain = m.dom;
 319        state.vma = vma;
 320        state.va = m.addr;
 321        state.err = 0;
 322
 323        ret = traverse_pages(m.num, sizeof(xen_pfn_t),
 324                             &pagelist, mmap_batch_fn, &state);
 325
 326        up_write(&mm->mmap_sem);
 327
 328        if (state.err > 0) {
 329                state.user = m.arr;
 330                ret = traverse_pages(m.num, sizeof(xen_pfn_t),
 331                               &pagelist,
 332                               mmap_return_errors, &state);
 333        }
 334
 335out:
 336        free_page_list(&pagelist);
 337
 338        return ret;
 339}
 340
 341static long privcmd_ioctl(struct file *file,
 342                          unsigned int cmd, unsigned long data)
 343{
 344        int ret = -ENOSYS;
 345        void __user *udata = (void __user *) data;
 346
 347        switch (cmd) {
 348        case IOCTL_PRIVCMD_HYPERCALL:
 349                ret = privcmd_ioctl_hypercall(udata);
 350                break;
 351
 352        case IOCTL_PRIVCMD_MMAP:
 353                ret = privcmd_ioctl_mmap(udata);
 354                break;
 355
 356        case IOCTL_PRIVCMD_MMAPBATCH:
 357                ret = privcmd_ioctl_mmap_batch(udata);
 358                break;
 359
 360        default:
 361                ret = -EINVAL;
 362                break;
 363        }
 364
 365        return ret;
 366}
 367
 368static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 369{
 370        printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
 371               vma, vma->vm_start, vma->vm_end,
 372               vmf->pgoff, vmf->virtual_address);
 373
 374        return VM_FAULT_SIGBUS;
 375}
 376
 377static struct vm_operations_struct privcmd_vm_ops = {
 378        .fault = privcmd_fault
 379};
 380
 381static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 382{
 383        /* Unsupported for auto-translate guests. */
 384        if (xen_feature(XENFEAT_auto_translated_physmap))
 385                return -ENOSYS;
 386
 387        /* DONTCOPY is essential for Xen because copy_page_range doesn't know
 388         * how to recreate these mappings */
 389        vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
 390        vma->vm_ops = &privcmd_vm_ops;
 391        vma->vm_private_data = NULL;
 392
 393        return 0;
 394}
 395
 396static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
 397{
 398        return (xchg(&vma->vm_private_data, (void *)1) == NULL);
 399}
 400
 401const struct file_operations xen_privcmd_fops = {
 402        .owner = THIS_MODULE,
 403        .unlocked_ioctl = privcmd_ioctl,
 404        .mmap = privcmd_mmap,
 405};
 406EXPORT_SYMBOL_GPL(xen_privcmd_fops);
 407
 408static struct miscdevice privcmd_dev = {
 409        .minor = MISC_DYNAMIC_MINOR,
 410        .name = "xen/privcmd",
 411        .fops = &xen_privcmd_fops,
 412};
 413
 414static int __init privcmd_init(void)
 415{
 416        int err;
 417
 418        if (!xen_domain())
 419                return -ENODEV;
 420
 421        err = misc_register(&privcmd_dev);
 422        if (err != 0) {
 423                printk(KERN_ERR "Could not register Xen privcmd device\n");
 424                return err;
 425        }
 426        return 0;
 427}
 428
 429static void __exit privcmd_exit(void)
 430{
 431        misc_deregister(&privcmd_dev);
 432}
 433
 434module_init(privcmd_init);
 435module_exit(privcmd_exit);
 436
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.