1
2
3
4
5
6
7
8#include <linux/capability.h>
9#include <linux/mman.h>
10#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/swapops.h>
13#include <linux/pagemap.h>
14#include <linux/mempolicy.h>
15#include <linux/syscalls.h>
16#include <linux/sched.h>
17#include <linux/module.h>
18#include <linux/rmap.h>
19#include <linux/mmzone.h>
20#include <linux/hugetlb.h>
21
22#include "internal.h"
23
24int can_do_mlock(void)
25{
26 if (capable(CAP_IPC_LOCK))
27 return 1;
28 if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
29 return 1;
30 return 0;
31}
32EXPORT_SYMBOL(can_do_mlock);
33
34#ifdef CONFIG_UNEVICTABLE_LRU
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55void __clear_page_mlock(struct page *page)
56{
57 VM_BUG_ON(!PageLocked(page));
58
59 if (!page->mapping) {
60 return;
61 }
62
63 dec_zone_page_state(page, NR_MLOCK);
64 count_vm_event(UNEVICTABLE_PGCLEARED);
65 if (!isolate_lru_page(page)) {
66 putback_lru_page(page);
67 } else {
68
69
70
71 if (PageUnevictable(page))
72 count_vm_event(UNEVICTABLE_PGSTRANDED);
73 }
74}
75
76
77
78
79
80void mlock_vma_page(struct page *page)
81{
82 BUG_ON(!PageLocked(page));
83
84 if (!TestSetPageMlocked(page)) {
85 inc_zone_page_state(page, NR_MLOCK);
86 count_vm_event(UNEVICTABLE_PGMLOCKED);
87 if (!isolate_lru_page(page))
88 putback_lru_page(page);
89 }
90}
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110static void munlock_vma_page(struct page *page)
111{
112 BUG_ON(!PageLocked(page));
113
114 if (TestClearPageMlocked(page)) {
115 dec_zone_page_state(page, NR_MLOCK);
116 if (!isolate_lru_page(page)) {
117 int ret = try_to_munlock(page);
118
119
120
121 if (ret == SWAP_SUCCESS || ret == SWAP_AGAIN)
122 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
123
124 putback_lru_page(page);
125 } else {
126
127
128
129
130
131
132
133
134 if (PageUnevictable(page))
135 count_vm_event(UNEVICTABLE_PGSTRANDED);
136 else
137 count_vm_event(UNEVICTABLE_PGMUNLOCKED);
138 }
139 }
140}
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157static long __mlock_vma_pages_range(struct vm_area_struct *vma,
158 unsigned long start, unsigned long end,
159 int mlock)
160{
161 struct mm_struct *mm = vma->vm_mm;
162 unsigned long addr = start;
163 struct page *pages[16];
164 int nr_pages = (end - start) / PAGE_SIZE;
165 int ret = 0;
166 int gup_flags = 0;
167
168 VM_BUG_ON(start & ~PAGE_MASK);
169 VM_BUG_ON(end & ~PAGE_MASK);
170 VM_BUG_ON(start < vma->vm_start);
171 VM_BUG_ON(end > vma->vm_end);
172 VM_BUG_ON((!rwsem_is_locked(&mm->mmap_sem)) &&
173 (atomic_read(&mm->mm_users) != 0));
174
175
176
177
178
179
180 if (!mlock)
181 gup_flags |= GUP_FLAGS_IGNORE_VMA_PERMISSIONS |
182 GUP_FLAGS_IGNORE_SIGKILL;
183
184 if (vma->vm_flags & VM_WRITE)
185 gup_flags |= GUP_FLAGS_WRITE;
186
187 while (nr_pages > 0) {
188 int i;
189
190 cond_resched();
191
192
193
194
195
196
197
198 ret = __get_user_pages(current, mm, addr,
199 min_t(int, nr_pages, ARRAY_SIZE(pages)),
200 gup_flags, pages, NULL);
201
202
203
204
205
206
207 if (ret < 0)
208 break;
209 if (ret == 0) {
210
211
212
213
214
215 WARN_ON(1);
216 break;
217 }
218
219 lru_add_drain();
220
221 for (i = 0; i < ret; i++) {
222 struct page *page = pages[i];
223
224 lock_page(page);
225
226
227
228
229
230 if (page->mapping) {
231 if (mlock)
232 mlock_vma_page(page);
233 else
234 munlock_vma_page(page);
235 }
236 unlock_page(page);
237 put_page(page);
238
239
240
241
242
243 addr += PAGE_SIZE;
244 nr_pages--;
245 }
246 ret = 0;
247 }
248
249 return ret;
250}
251
252
253
254
255static int __mlock_posix_error_return(long retval)
256{
257 if (retval == -EFAULT)
258 retval = -ENOMEM;
259 else if (retval == -ENOMEM)
260 retval = -EAGAIN;
261 return retval;
262}
263
264#else
265
266
267
268
269static long __mlock_vma_pages_range(struct vm_area_struct *vma,
270 unsigned long start, unsigned long end,
271 int mlock)
272{
273 if (mlock && (vma->vm_flags & VM_LOCKED))
274 return make_pages_present(start, end);
275 return 0;
276}
277
278static inline int __mlock_posix_error_return(long retval)
279{
280 return 0;
281}
282
283#endif
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298long mlock_vma_pages_range(struct vm_area_struct *vma,
299 unsigned long start, unsigned long end)
300{
301 int nr_pages = (end - start) / PAGE_SIZE;
302 BUG_ON(!(vma->vm_flags & VM_LOCKED));
303
304
305
306
307 if (vma->vm_flags & (VM_IO | VM_PFNMAP))
308 goto no_mlock;
309
310 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
311 is_vm_hugetlb_page(vma) ||
312 vma == get_gate_vma(current))) {
313
314 __mlock_vma_pages_range(vma, start, end, 1);
315
316
317 return 0;
318 }
319
320
321
322
323
324
325
326
327
328 make_pages_present(start, end);
329
330no_mlock:
331 vma->vm_flags &= ~VM_LOCKED;
332 return nr_pages;
333}
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354void munlock_vma_pages_range(struct vm_area_struct *vma,
355 unsigned long start, unsigned long end)
356{
357 vma->vm_flags &= ~VM_LOCKED;
358 __mlock_vma_pages_range(vma, start, end, 0);
359}
360
361
362
363
364
365
366
367
368
369
370static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
371 unsigned long start, unsigned long end, unsigned int newflags)
372{
373 struct mm_struct *mm = vma->vm_mm;
374 pgoff_t pgoff;
375 int nr_pages;
376 int ret = 0;
377 int lock = newflags & VM_LOCKED;
378
379 if (newflags == vma->vm_flags ||
380 (vma->vm_flags & (VM_IO | VM_PFNMAP)))
381 goto out;
382
383 if ((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
384 is_vm_hugetlb_page(vma) ||
385 vma == get_gate_vma(current)) {
386 if (lock)
387 make_pages_present(start, end);
388 goto out;
389 }
390
391 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
392 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
393 vma->vm_file, pgoff, vma_policy(vma));
394 if (*prev) {
395 vma = *prev;
396 goto success;
397 }
398
399 if (start != vma->vm_start) {
400 ret = split_vma(mm, vma, start, 1);
401 if (ret)
402 goto out;
403 }
404
405 if (end != vma->vm_end) {
406 ret = split_vma(mm, vma, end, 0);
407 if (ret)
408 goto out;
409 }
410
411success:
412
413
414
415 nr_pages = (end - start) >> PAGE_SHIFT;
416 if (!lock)
417 nr_pages = -nr_pages;
418 mm->locked_vm += nr_pages;
419
420
421
422
423
424
425 vma->vm_flags = newflags;
426
427 if (lock) {
428 ret = __mlock_vma_pages_range(vma, start, end, 1);
429
430 if (ret > 0) {
431 mm->locked_vm -= ret;
432 ret = 0;
433 } else
434 ret = __mlock_posix_error_return(ret);
435 } else {
436 __mlock_vma_pages_range(vma, start, end, 0);
437 }
438
439out:
440 *prev = vma;
441 return ret;
442}
443
444static int do_mlock(unsigned long start, size_t len, int on)
445{
446 unsigned long nstart, end, tmp;
447 struct vm_area_struct * vma, * prev;
448 int error;
449
450 len = PAGE_ALIGN(len);
451 end = start + len;
452 if (end < start)
453 return -EINVAL;
454 if (end == start)
455 return 0;
456 vma = find_vma_prev(current->mm, start, &prev);
457 if (!vma || vma->vm_start > start)
458 return -ENOMEM;
459
460 if (start > vma->vm_start)
461 prev = vma;
462
463 for (nstart = start ; ; ) {
464 unsigned int newflags;
465
466
467
468 newflags = vma->vm_flags | VM_LOCKED;
469 if (!on)
470 newflags &= ~VM_LOCKED;
471
472 tmp = vma->vm_end;
473 if (tmp > end)
474 tmp = end;
475 error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
476 if (error)
477 break;
478 nstart = tmp;
479 if (nstart < prev->vm_end)
480 nstart = prev->vm_end;
481 if (nstart >= end)
482 break;
483
484 vma = prev->vm_next;
485 if (!vma || vma->vm_start != nstart) {
486 error = -ENOMEM;
487 break;
488 }
489 }
490 return error;
491}
492
493SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
494{
495 unsigned long locked;
496 unsigned long lock_limit;
497 int error = -ENOMEM;
498
499 if (!can_do_mlock())
500 return -EPERM;
501
502 lru_add_drain_all();
503
504 down_write(¤t->mm->mmap_sem);
505 len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
506 start &= PAGE_MASK;
507
508 locked = len >> PAGE_SHIFT;
509 locked += current->mm->locked_vm;
510
511 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
512 lock_limit >>= PAGE_SHIFT;
513
514
515 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
516 error = do_mlock(start, len, 1);
517 up_write(¤t->mm->mmap_sem);
518 return error;
519}
520
521SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
522{
523 int ret;
524
525 down_write(¤t->mm->mmap_sem);
526 len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
527 start &= PAGE_MASK;
528 ret = do_mlock(start, len, 0);
529 up_write(¤t->mm->mmap_sem);
530 return ret;
531}
532
533static int do_mlockall(int flags)
534{
535 struct vm_area_struct * vma, * prev = NULL;
536 unsigned int def_flags = 0;
537
538 if (flags & MCL_FUTURE)
539 def_flags = VM_LOCKED;
540 current->mm->def_flags = def_flags;
541 if (flags == MCL_FUTURE)
542 goto out;
543
544 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
545 unsigned int newflags;
546
547 newflags = vma->vm_flags | VM_LOCKED;
548 if (!(flags & MCL_CURRENT))
549 newflags &= ~VM_LOCKED;
550
551
552 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
553 }
554out:
555 return 0;
556}
557
558SYSCALL_DEFINE1(mlockall, int, flags)
559{
560 unsigned long lock_limit;
561 int ret = -EINVAL;
562
563 if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
564 goto out;
565
566 ret = -EPERM;
567 if (!can_do_mlock())
568 goto out;
569
570 lru_add_drain_all();
571
572 down_write(¤t->mm->mmap_sem);
573
574 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
575 lock_limit >>= PAGE_SHIFT;
576
577 ret = -ENOMEM;
578 if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
579 capable(CAP_IPC_LOCK))
580 ret = do_mlockall(flags);
581 up_write(¤t->mm->mmap_sem);
582out:
583 return ret;
584}
585
586SYSCALL_DEFINE0(munlockall)
587{
588 int ret;
589
590 down_write(¤t->mm->mmap_sem);
591 ret = do_mlockall(0);
592 up_write(¤t->mm->mmap_sem);
593 return ret;
594}
595
596
597
598
599
600static DEFINE_SPINLOCK(shmlock_user_lock);
601
602int user_shm_lock(size_t size, struct user_struct *user)
603{
604 unsigned long lock_limit, locked;
605 int allowed = 0;
606
607 locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
608 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
609 if (lock_limit == RLIM_INFINITY)
610 allowed = 1;
611 lock_limit >>= PAGE_SHIFT;
612 spin_lock(&shmlock_user_lock);
613 if (!allowed &&
614 locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
615 goto out;
616 get_uid(user);
617 user->locked_shm += locked;
618 allowed = 1;
619out:
620 spin_unlock(&shmlock_user_lock);
621 return allowed;
622}
623
624void user_shm_unlock(size_t size, struct user_struct *user)
625{
626 spin_lock(&shmlock_user_lock);
627 user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
628 spin_unlock(&shmlock_user_lock);
629 free_uid(user);
630}
631
632void *alloc_locked_buffer(size_t size)
633{
634 unsigned long rlim, vm, pgsz;
635 void *buffer = NULL;
636
637 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
638
639 down_write(¤t->mm->mmap_sem);
640
641 rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
642 vm = current->mm->total_vm + pgsz;
643 if (rlim < vm)
644 goto out;
645
646 rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
647 vm = current->mm->locked_vm + pgsz;
648 if (rlim < vm)
649 goto out;
650
651 buffer = kzalloc(size, GFP_KERNEL);
652 if (!buffer)
653 goto out;
654
655 current->mm->total_vm += pgsz;
656 current->mm->locked_vm += pgsz;
657
658 out:
659 up_write(¤t->mm->mmap_sem);
660 return buffer;
661}
662
663void release_locked_buffer(void *buffer, size_t size)
664{
665 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
666
667 down_write(¤t->mm->mmap_sem);
668
669 current->mm->total_vm -= pgsz;
670 current->mm->locked_vm -= pgsz;
671
672 up_write(¤t->mm->mmap_sem);
673}
674
675void free_locked_buffer(void *buffer, size_t size)
676{
677 release_locked_buffer(buffer, size);
678
679 kfree(buffer);
680}
681