1
2
3
4
5
6
7
8#include <linux/mman.h>
9#include <linux/pagemap.h>
10#include <linux/syscalls.h>
11#include <linux/mempolicy.h>
12#include <linux/page-isolation.h>
13#include <linux/hugetlb.h>
14#include <linux/falloc.h>
15#include <linux/sched.h>
16#include <linux/ksm.h>
17#include <linux/fs.h>
18#include <linux/file.h>
19
20
21
22
23
24
25static int madvise_need_mmap_write(int behavior)
26{
27 switch (behavior) {
28 case MADV_REMOVE:
29 case MADV_WILLNEED:
30 case MADV_DONTNEED:
31 return 0;
32 default:
33
34 return 1;
35 }
36}
37
38
39
40
41
42static long madvise_behavior(struct vm_area_struct * vma,
43 struct vm_area_struct **prev,
44 unsigned long start, unsigned long end, int behavior)
45{
46 struct mm_struct * mm = vma->vm_mm;
47 int error = 0;
48 pgoff_t pgoff;
49 unsigned long new_flags = vma->vm_flags;
50
51 switch (behavior) {
52 case MADV_NORMAL:
53 new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
54 break;
55 case MADV_SEQUENTIAL:
56 new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
57 break;
58 case MADV_RANDOM:
59 new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
60 break;
61 case MADV_DONTFORK:
62 new_flags |= VM_DONTCOPY;
63 break;
64 case MADV_DOFORK:
65 if (vma->vm_flags & VM_IO) {
66 error = -EINVAL;
67 goto out;
68 }
69 new_flags &= ~VM_DONTCOPY;
70 break;
71 case MADV_DONTDUMP:
72 new_flags |= VM_NODUMP;
73 break;
74 case MADV_DODUMP:
75 new_flags &= ~VM_NODUMP;
76 break;
77 case MADV_MERGEABLE:
78 case MADV_UNMERGEABLE:
79 error = ksm_madvise(vma, start, end, behavior, &new_flags);
80 if (error)
81 goto out;
82 break;
83 case MADV_HUGEPAGE:
84 case MADV_NOHUGEPAGE:
85 error = hugepage_madvise(vma, &new_flags, behavior);
86 if (error)
87 goto out;
88 break;
89 }
90
91 if (new_flags == vma->vm_flags) {
92 *prev = vma;
93 goto out;
94 }
95
96 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
97 *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
98 vma->vm_file, pgoff, vma_policy(vma));
99 if (*prev) {
100 vma = *prev;
101 goto success;
102 }
103
104 *prev = vma;
105
106 if (start != vma->vm_start) {
107 error = split_vma(mm, vma, start, 1);
108 if (error)
109 goto out;
110 }
111
112 if (end != vma->vm_end) {
113 error = split_vma(mm, vma, end, 0);
114 if (error)
115 goto out;
116 }
117
118success:
119
120
121
122 vma->vm_flags = new_flags;
123
124out:
125 if (error == -ENOMEM)
126 error = -EAGAIN;
127 return error;
128}
129
130
131
132
133static long madvise_willneed(struct vm_area_struct * vma,
134 struct vm_area_struct ** prev,
135 unsigned long start, unsigned long end)
136{
137 struct file *file = vma->vm_file;
138
139 if (!file)
140 return -EBADF;
141
142 if (file->f_mapping->a_ops->get_xip_mem) {
143
144 return 0;
145 }
146
147 *prev = vma;
148 start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
149 if (end > vma->vm_end)
150 end = vma->vm_end;
151 end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
152
153 force_page_cache_readahead(file->f_mapping, file, start, end - start);
154 return 0;
155}
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176static long madvise_dontneed(struct vm_area_struct * vma,
177 struct vm_area_struct ** prev,
178 unsigned long start, unsigned long end)
179{
180 *prev = vma;
181 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
182 return -EINVAL;
183
184 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
185 struct zap_details details = {
186 .nonlinear_vma = vma,
187 .last_index = ULONG_MAX,
188 };
189 zap_page_range(vma, start, end - start, &details);
190 } else
191 zap_page_range(vma, start, end - start, NULL);
192 return 0;
193}
194
195
196
197
198
199
200
201
202static long madvise_remove(struct vm_area_struct *vma,
203 struct vm_area_struct **prev,
204 unsigned long start, unsigned long end)
205{
206 loff_t offset;
207 int error;
208 struct file *f;
209
210 *prev = NULL;
211
212 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
213 return -EINVAL;
214
215 f = vma->vm_file;
216
217 if (!f || !f->f_mapping || !f->f_mapping->host) {
218 return -EINVAL;
219 }
220
221 if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE))
222 return -EACCES;
223
224 offset = (loff_t)(start - vma->vm_start)
225 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
226
227
228
229
230
231
232
233 get_file(f);
234 up_read(¤t->mm->mmap_sem);
235 error = do_fallocate(f,
236 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
237 offset, end - start);
238 fput(f);
239 down_read(¤t->mm->mmap_sem);
240 return error;
241}
242
243#ifdef CONFIG_MEMORY_FAILURE
244
245
246
247static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
248{
249 int ret = 0;
250
251 if (!capable(CAP_SYS_ADMIN))
252 return -EPERM;
253 for (; start < end; start += PAGE_SIZE) {
254 struct page *p;
255 int ret = get_user_pages_fast(start, 1, 0, &p);
256 if (ret != 1)
257 return ret;
258 if (bhv == MADV_SOFT_OFFLINE) {
259 printk(KERN_INFO "Soft offlining page %lx at %lx\n",
260 page_to_pfn(p), start);
261 ret = soft_offline_page(p, MF_COUNT_INCREASED);
262 if (ret)
263 break;
264 continue;
265 }
266 printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
267 page_to_pfn(p), start);
268
269 memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
270 }
271 return ret;
272}
273#endif
274
275static long
276madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
277 unsigned long start, unsigned long end, int behavior)
278{
279 switch (behavior) {
280 case MADV_REMOVE:
281 return madvise_remove(vma, prev, start, end);
282 case MADV_WILLNEED:
283 return madvise_willneed(vma, prev, start, end);
284 case MADV_DONTNEED:
285 return madvise_dontneed(vma, prev, start, end);
286 default:
287 return madvise_behavior(vma, prev, start, end, behavior);
288 }
289}
290
291static int
292madvise_behavior_valid(int behavior)
293{
294 switch (behavior) {
295 case MADV_DOFORK:
296 case MADV_DONTFORK:
297 case MADV_NORMAL:
298 case MADV_SEQUENTIAL:
299 case MADV_RANDOM:
300 case MADV_REMOVE:
301 case MADV_WILLNEED:
302 case MADV_DONTNEED:
303#ifdef CONFIG_KSM
304 case MADV_MERGEABLE:
305 case MADV_UNMERGEABLE:
306#endif
307#ifdef CONFIG_TRANSPARENT_HUGEPAGE
308 case MADV_HUGEPAGE:
309 case MADV_NOHUGEPAGE:
310#endif
311 case MADV_DONTDUMP:
312 case MADV_DODUMP:
313 return 1;
314
315 default:
316 return 0;
317 }
318}
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
363{
364 unsigned long end, tmp;
365 struct vm_area_struct * vma, *prev;
366 int unmapped_error = 0;
367 int error = -EINVAL;
368 int write;
369 size_t len;
370
371#ifdef CONFIG_MEMORY_FAILURE
372 if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
373 return madvise_hwpoison(behavior, start, start+len_in);
374#endif
375 if (!madvise_behavior_valid(behavior))
376 return error;
377
378 write = madvise_need_mmap_write(behavior);
379 if (write)
380 down_write(¤t->mm->mmap_sem);
381 else
382 down_read(¤t->mm->mmap_sem);
383
384 if (start & ~PAGE_MASK)
385 goto out;
386 len = (len_in + ~PAGE_MASK) & PAGE_MASK;
387
388
389 if (len_in && !len)
390 goto out;
391
392 end = start + len;
393 if (end < start)
394 goto out;
395
396 error = 0;
397 if (end == start)
398 goto out;
399
400
401
402
403
404
405 vma = find_vma_prev(current->mm, start, &prev);
406 if (vma && start > vma->vm_start)
407 prev = vma;
408
409 for (;;) {
410
411 error = -ENOMEM;
412 if (!vma)
413 goto out;
414
415
416 if (start < vma->vm_start) {
417 unmapped_error = -ENOMEM;
418 start = vma->vm_start;
419 if (start >= end)
420 goto out;
421 }
422
423
424 tmp = vma->vm_end;
425 if (end < tmp)
426 tmp = end;
427
428
429 error = madvise_vma(vma, &prev, start, tmp, behavior);
430 if (error)
431 goto out;
432 start = tmp;
433 if (prev && start < prev->vm_end)
434 start = prev->vm_end;
435 error = unmapped_error;
436 if (start >= end)
437 goto out;
438 if (prev)
439 vma = prev->vm_next;
440 else
441 vma = find_vma(current->mm, start);
442 }
443out:
444 if (write)
445 up_write(¤t->mm->mmap_sem);
446 else
447 up_read(¤t->mm->mmap_sem);
448
449 return error;
450}
451