1
2
3
4
5
6
7
8#include <linux/mman.h>
9#include <linux/pagemap.h>
10#include <linux/syscalls.h>
11#include <linux/mempolicy.h>
12#include <linux/page-isolation.h>
13#include <linux/hugetlb.h>
14#include <linux/falloc.h>
15#include <linux/sched.h>
16#include <linux/ksm.h>
17#include <linux/fs.h>
18#include <linux/file.h>
19
20
21
22
23
24
25static int madvise_need_mmap_write(int behavior)
26{
27 switch (behavior) {
28 case MADV_REMOVE:
29 case MADV_WILLNEED:
30 case MADV_DONTNEED:
31 return 0;
32 default:
33
34 return 1;
35 }
36}
37
38
39
40
41
42static long madvise_behavior(struct vm_area_struct * vma,
43 struct vm_area_struct **prev,
44 unsigned long start, unsigned long end, int behavior)
45{
46 struct mm_struct * mm = vma->vm_mm;
47 int error = 0;
48 pgoff_t pgoff;
49 unsigned long new_flags = vma->vm_flags;
50
51 switch (behavior) {
52 case MADV_NORMAL:
53 new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
54 break;
55 case MADV_SEQUENTIAL:
56 new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
57 break;
58 case MADV_RANDOM:
59 new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
60 break;
61 case MADV_DONTFORK:
62 new_flags |= VM_DONTCOPY;
63 break;
64 case MADV_DOFORK:
65 if (vma->vm_flags & VM_IO) {
66 error = -EINVAL;
67 goto out;
68 }
69 new_flags &= ~VM_DONTCOPY;
70 break;
71 case MADV_DONTDUMP:
72 new_flags |= VM_DONTDUMP;
73 break;
74 case MADV_DODUMP:
75 if (new_flags & VM_SPECIAL) {
76 error = -EINVAL;
77 goto out;
78 }
79 new_flags &= ~VM_DONTDUMP;
80 break;
81 case MADV_MERGEABLE:
82 case MADV_UNMERGEABLE:
83 error = ksm_madvise(vma, start, end, behavior, &new_flags);
84 if (error)
85 goto out;
86 break;
87 case MADV_HUGEPAGE:
88 case MADV_NOHUGEPAGE:
89 error = hugepage_madvise(vma, &new_flags, behavior);
90 if (error)
91 goto out;
92 break;
93 }
94
95 if (new_flags == vma->vm_flags) {
96 *prev = vma;
97 goto out;
98 }
99
100 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
101 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
102 vma->vm_file, pgoff, vma_policy(vma));
103 if (*prev) {
104 vma = *prev;
105 goto success;
106 }
107
108 *prev = vma;
109
110 if (start != vma->vm_start) {
111 error = split_vma(mm, vma, start, 1);
112 if (error)
113 goto out;
114 }
115
116 if (end != vma->vm_end) {
117 error = split_vma(mm, vma, end, 0);
118 if (error)
119 goto out;
120 }
121
122success:
123
124
125
126 vma->vm_flags = new_flags;
127
128out:
129 if (error == -ENOMEM)
130 error = -EAGAIN;
131 return error;
132}
133
134
135
136
137static long madvise_willneed(struct vm_area_struct * vma,
138 struct vm_area_struct ** prev,
139 unsigned long start, unsigned long end)
140{
141 struct file *file = vma->vm_file;
142
143 if (!file)
144 return -EBADF;
145
146 if (file->f_mapping->a_ops->get_xip_mem) {
147
148 return 0;
149 }
150
151 *prev = vma;
152 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
153 if (end > vma->vm_end)
154 end = vma->vm_end;
155 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
156
157 force_page_cache_readahead(file->f_mapping, file, start, end - start);
158 return 0;
159}
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180static long madvise_dontneed(struct vm_area_struct * vma,
181 struct vm_area_struct ** prev,
182 unsigned long start, unsigned long end)
183{
184 *prev = vma;
185 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
186 return -EINVAL;
187
188 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
189 struct zap_details details = {
190 .nonlinear_vma = vma,
191 .last_index = ULONG_MAX,
192 };
193 zap_page_range(vma, start, end - start, &details);
194 } else
195 zap_page_range(vma, start, end - start, NULL);
196 return 0;
197}
198
199
200
201
202
203
204
205
206static long madvise_remove(struct vm_area_struct *vma,
207 struct vm_area_struct **prev,
208 unsigned long start, unsigned long end)
209{
210 loff_t offset;
211 int error;
212 struct file *f;
213
214 *prev = NULL;
215
216 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
217 return -EINVAL;
218
219 f = vma->vm_file;
220
221 if (!f || !f->f_mapping || !f->f_mapping->host) {
222 return -EINVAL;
223 }
224
225 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
226 return -EACCES;
227
228 offset = (loff_t)(start - vma->vm_start)
229 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
230
231
232
233
234
235
236
237 get_file(f);
238 up_read(¤t->mm->mmap_sem);
239 error = do_fallocate(f,
240 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
241 offset, end - start);
242 fput(f);
243 down_read(¤t->mm->mmap_sem);
244 return error;
245}
246
247#ifdef CONFIG_MEMORY_FAILURE
248
249
250
251static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
252{
253 int ret = 0;
254
255 if (!capable(CAP_SYS_ADMIN))
256 return -EPERM;
257 for (; start < end; start += PAGE_SIZE) {
258 struct page *p;
259 int ret = get_user_pages_fast(start, 1, 0, &p);
260 if (ret != 1)
261 return ret;
262 if (bhv == MADV_SOFT_OFFLINE) {
263 printk(KERN_INFO "Soft offlining page %lx at %lx\n",
264 page_to_pfn(p), start);
265 ret = soft_offline_page(p, MF_COUNT_INCREASED);
266 if (ret)
267 break;
268 continue;
269 }
270 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
271 page_to_pfn(p), start);
272
273 memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
274 }
275 return ret;
276}
277#endif
278
279static long
280madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
281 unsigned long start, unsigned long end, int behavior)
282{
283 switch (behavior) {
284 case MADV_REMOVE:
285 return madvise_remove(vma, prev, start, end);
286 case MADV_WILLNEED:
287 return madvise_willneed(vma, prev, start, end);
288 case MADV_DONTNEED:
289 return madvise_dontneed(vma, prev, start, end);
290 default:
291 return madvise_behavior(vma, prev, start, end, behavior);
292 }
293}
294
295static int
296madvise_behavior_valid(int behavior)
297{
298 switch (behavior) {
299 case MADV_DOFORK:
300 case MADV_DONTFORK:
301 case MADV_NORMAL:
302 case MADV_SEQUENTIAL:
303 case MADV_RANDOM:
304 case MADV_REMOVE:
305 case MADV_WILLNEED:
306 case MADV_DONTNEED:
307#ifdef CONFIG_KSM
308 case MADV_MERGEABLE:
309 case MADV_UNMERGEABLE:
310#endif
311#ifdef CONFIG_TRANSPARENT_HUGEPAGE
312 case MADV_HUGEPAGE:
313 case MADV_NOHUGEPAGE:
314#endif
315 case MADV_DONTDUMP:
316 case MADV_DODUMP:
317 return 1;
318
319 default:
320 return 0;
321 }
322}
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
367{
368 unsigned long end, tmp;
369 struct vm_area_struct * vma, *prev;
370 int unmapped_error = 0;
371 int error = -EINVAL;
372 int write;
373 size_t len;
374
375#ifdef CONFIG_MEMORY_FAILURE
376 if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
377 return madvise_hwpoison(behavior, start, start+len_in);
378#endif
379 if (!madvise_behavior_valid(behavior))
380 return error;
381
382 write = madvise_need_mmap_write(behavior);
383 if (write)
384 down_write(¤t->mm->mmap_sem);
385 else
386 down_read(¤t->mm->mmap_sem);
387
388 if (start & ~PAGE_MASK)
389 goto out;
390 len = (len_in + ~PAGE_MASK) & PAGE_MASK;
391
392
393 if (len_in && !len)
394 goto out;
395
396 end = start + len;
397 if (end < start)
398 goto out;
399
400 error = 0;
401 if (end == start)
402 goto out;
403
404
405
406
407
408
409 vma = find_vma_prev(current->mm, start, &prev);
410 if (vma && start > vma->vm_start)
411 prev = vma;
412
413 for (;;) {
414
415 error = -ENOMEM;
416 if (!vma)
417 goto out;
418
419
420 if (start < vma->vm_start) {
421 unmapped_error = -ENOMEM;
422 start = vma->vm_start;
423 if (start >= end)
424 goto out;
425 }
426
427
428 tmp = vma->vm_end;
429 if (end < tmp)
430 tmp = end;
431
432
433 error = madvise_vma(vma, &prev, start, tmp, behavior);
434 if (error)
435 goto out;
436 start = tmp;
437 if (prev && start < prev->vm_end)
438 start = prev->vm_end;
439 error = unmapped_error;
440 if (start >= end)
441 goto out;
442 if (prev)
443 vma = prev->vm_next;
444 else
445 vma = find_vma(current->mm, start);
446 }
447out:
448 if (write)
449 up_write(¤t->mm->mmap_sem);
450 else
451 up_read(¤t->mm->mmap_sem);
452
453 return error;
454}
455