linux/mm/mlock.c
<<
>>
Prefs
   1/*
   2 *      linux/mm/mlock.c
   3 *
   4 *  (C) Copyright 1995 Linus Torvalds
   5 *  (C) Copyright 2002 Christoph Hellwig
   6 */
   7
   8#include <linux/capability.h>
   9#include <linux/mman.h>
  10#include <linux/mm.h>
  11#include <linux/mempolicy.h>
  12#include <linux/syscalls.h>
  13#include <linux/sched.h>
  14#include <linux/module.h>
  15
  16int can_do_mlock(void)
  17{
  18        if (capable(CAP_IPC_LOCK))
  19                return 1;
  20        if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
  21                return 1;
  22        return 0;
  23}
  24EXPORT_SYMBOL(can_do_mlock);
  25
  26static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
  27        unsigned long start, unsigned long end, unsigned int newflags)
  28{
  29        struct mm_struct * mm = vma->vm_mm;
  30        pgoff_t pgoff;
  31        int pages;
  32        int ret = 0;
  33
  34        if (newflags == vma->vm_flags) {
  35                *prev = vma;
  36                goto out;
  37        }
  38
  39        pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
  40        *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
  41                          vma->vm_file, pgoff, vma_policy(vma));
  42        if (*prev) {
  43                vma = *prev;
  44                goto success;
  45        }
  46
  47        *prev = vma;
  48
  49        if (start != vma->vm_start) {
  50                ret = split_vma(mm, vma, start, 1);
  51                if (ret)
  52                        goto out;
  53        }
  54
  55        if (end != vma->vm_end) {
  56                ret = split_vma(mm, vma, end, 0);
  57                if (ret)
  58                        goto out;
  59        }
  60
  61success:
  62        /*
  63         * vm_flags is protected by the mmap_sem held in write mode.
  64         * It's okay if try_to_unmap_one unmaps a page just after we
  65         * set VM_LOCKED, make_pages_present below will bring it back.
  66         */
  67        vma->vm_flags = newflags;
  68
  69        /*
  70         * Keep track of amount of locked VM.
  71         */
  72        pages = (end - start) >> PAGE_SHIFT;
  73        if (newflags & VM_LOCKED) {
  74                pages = -pages;
  75                if (!(newflags & VM_IO))
  76                        ret = make_pages_present(start, end);
  77        }
  78
  79        mm->locked_vm -= pages;
  80out:
  81        return ret;
  82}
  83
  84static int do_mlock(unsigned long start, size_t len, int on)
  85{
  86        unsigned long nstart, end, tmp;
  87        struct vm_area_struct * vma, * prev;
  88        int error;
  89
  90        len = PAGE_ALIGN(len);
  91        end = start + len;
  92        if (end < start)
  93                return -EINVAL;
  94        if (end == start)
  95                return 0;
  96        vma = find_vma_prev(current->mm, start, &prev);
  97        if (!vma || vma->vm_start > start)
  98                return -ENOMEM;
  99
 100        if (start > vma->vm_start)
 101                prev = vma;
 102
 103        for (nstart = start ; ; ) {
 104                unsigned int newflags;
 105
 106                /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
 107
 108                newflags = vma->vm_flags | VM_LOCKED;
 109                if (!on)
 110                        newflags &= ~VM_LOCKED;
 111
 112                tmp = vma->vm_end;
 113                if (tmp > end)
 114                        tmp = end;
 115                error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
 116                if (error)
 117                        break;
 118                nstart = tmp;
 119                if (nstart < prev->vm_end)
 120                        nstart = prev->vm_end;
 121                if (nstart >= end)
 122                        break;
 123
 124                vma = prev->vm_next;
 125                if (!vma || vma->vm_start != nstart) {
 126                        error = -ENOMEM;
 127                        break;
 128                }
 129        }
 130        return error;
 131}
 132
 133SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
 134{
 135        unsigned long locked;
 136        unsigned long lock_limit;
 137        int error = -ENOMEM;
 138
 139        if (!can_do_mlock())
 140                return -EPERM;
 141
 142        down_write(&current->mm->mmap_sem);
 143        len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
 144        start &= PAGE_MASK;
 145
 146        locked = len >> PAGE_SHIFT;
 147        locked += current->mm->locked_vm;
 148
 149        lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 150        lock_limit >>= PAGE_SHIFT;
 151
 152        /* check against resource limits */
 153        if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
 154                error = do_mlock(start, len, 1);
 155        up_write(&current->mm->mmap_sem);
 156        return error;
 157}
 158
 159SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
 160{
 161        int ret;
 162
 163        down_write(&current->mm->mmap_sem);
 164        len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
 165        start &= PAGE_MASK;
 166        ret = do_mlock(start, len, 0);
 167        up_write(&current->mm->mmap_sem);
 168        return ret;
 169}
 170
 171static int do_mlockall(int flags)
 172{
 173        struct vm_area_struct * vma, * prev = NULL;
 174        unsigned int def_flags = 0;
 175
 176        if (flags & MCL_FUTURE)
 177                def_flags = VM_LOCKED;
 178        current->mm->def_flags = def_flags;
 179        if (flags == MCL_FUTURE)
 180                goto out;
 181
 182        for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
 183                unsigned int newflags;
 184
 185                newflags = vma->vm_flags | VM_LOCKED;
 186                if (!(flags & MCL_CURRENT))
 187                        newflags &= ~VM_LOCKED;
 188
 189                /* Ignore errors */
 190                mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
 191        }
 192out:
 193        return 0;
 194}
 195
 196SYSCALL_DEFINE1(mlockall, int, flags)
 197{
 198        unsigned long lock_limit;
 199        int ret = -EINVAL;
 200
 201        if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
 202                goto out;
 203
 204        ret = -EPERM;
 205        if (!can_do_mlock())
 206                goto out;
 207
 208        down_write(&current->mm->mmap_sem);
 209
 210        lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 211        lock_limit >>= PAGE_SHIFT;
 212
 213        ret = -ENOMEM;
 214        if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
 215            capable(CAP_IPC_LOCK))
 216                ret = do_mlockall(flags);
 217        up_write(&current->mm->mmap_sem);
 218out:
 219        return ret;
 220}
 221
 222SYSCALL_DEFINE0(munlockall)
 223{
 224        int ret;
 225
 226        down_write(&current->mm->mmap_sem);
 227        ret = do_mlockall(0);
 228        up_write(&current->mm->mmap_sem);
 229        return ret;
 230}
 231
 232/*
 233 * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
 234 * shm segments) get accounted against the user_struct instead.
 235 */
 236static DEFINE_SPINLOCK(shmlock_user_lock);
 237
 238int user_shm_lock(size_t size, struct user_struct *user)
 239{
 240        unsigned long lock_limit, locked;
 241        int allowed = 0;
 242
 243        locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 244        lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 245        if (lock_limit == RLIM_INFINITY)
 246                allowed = 1;
 247        lock_limit >>= PAGE_SHIFT;
 248        spin_lock(&shmlock_user_lock);
 249        if (!allowed &&
 250            locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
 251                goto out;
 252        get_uid(user);
 253        user->locked_shm += locked;
 254        allowed = 1;
 255out:
 256        spin_unlock(&shmlock_user_lock);
 257        return allowed;
 258}
 259
 260void user_shm_unlock(size_t size, struct user_struct *user)
 261{
 262        spin_lock(&shmlock_user_lock);
 263        user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 264        spin_unlock(&shmlock_user_lock);
 265        free_uid(user);
 266}
 267