1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33#define DEBUG 1
34#include <linux/kernel.h>
35#include <linux/mm.h>
36#include <linux/page-flags.h>
37#include <linux/kernel-page-flags.h>
38#include <linux/sched.h>
39#include <linux/ksm.h>
40#include <linux/rmap.h>
41#include <linux/pagemap.h>
42#include <linux/swap.h>
43#include <linux/backing-dev.h>
44#include <linux/migrate.h>
45#include <linux/page-isolation.h>
46#include <linux/suspend.h>
47#include <linux/slab.h>
48#include <linux/swapops.h>
49#include <linux/hugetlb.h>
50#include "internal.h"
51
52int sysctl_memory_failure_early_kill __read_mostly = 0;
53
54int sysctl_memory_failure_recovery __read_mostly = 1;
55
56atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
57
58#if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
59
60u32 hwpoison_filter_enable = 0;
61u32 hwpoison_filter_dev_major = ~0U;
62u32 hwpoison_filter_dev_minor = ~0U;
63u64 hwpoison_filter_flags_mask;
64u64 hwpoison_filter_flags_value;
65EXPORT_SYMBOL_GPL(hwpoison_filter_enable);
66EXPORT_SYMBOL_GPL(hwpoison_filter_dev_major);
67EXPORT_SYMBOL_GPL(hwpoison_filter_dev_minor);
68EXPORT_SYMBOL_GPL(hwpoison_filter_flags_mask);
69EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value);
70
71static int hwpoison_filter_dev(struct page *p)
72{
73 struct address_space *mapping;
74 dev_t dev;
75
76 if (hwpoison_filter_dev_major == ~0U &&
77 hwpoison_filter_dev_minor == ~0U)
78 return 0;
79
80
81
82
83 if (PageSlab(p))
84 return -EINVAL;
85
86 mapping = page_mapping(p);
87 if (mapping == NULL || mapping->host == NULL)
88 return -EINVAL;
89
90 dev = mapping->host->i_sb->s_dev;
91 if (hwpoison_filter_dev_major != ~0U &&
92 hwpoison_filter_dev_major != MAJOR(dev))
93 return -EINVAL;
94 if (hwpoison_filter_dev_minor != ~0U &&
95 hwpoison_filter_dev_minor != MINOR(dev))
96 return -EINVAL;
97
98 return 0;
99}
100
101static int hwpoison_filter_flags(struct page *p)
102{
103 if (!hwpoison_filter_flags_mask)
104 return 0;
105
106 if ((stable_page_flags(p) & hwpoison_filter_flags_mask) ==
107 hwpoison_filter_flags_value)
108 return 0;
109 else
110 return -EINVAL;
111}
112
113
114
115
116
117
118
119
120
121
122
123#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
124u64 hwpoison_filter_memcg;
125EXPORT_SYMBOL_GPL(hwpoison_filter_memcg);
126static int hwpoison_filter_task(struct page *p)
127{
128 struct mem_cgroup *mem;
129 struct cgroup_subsys_state *css;
130 unsigned long ino;
131
132 if (!hwpoison_filter_memcg)
133 return 0;
134
135 mem = try_get_mem_cgroup_from_page(p);
136 if (!mem)
137 return -EINVAL;
138
139 css = mem_cgroup_css(mem);
140
141 if (!css->cgroup->dentry)
142 return -EINVAL;
143
144 ino = css->cgroup->dentry->d_inode->i_ino;
145 css_put(css);
146
147 if (ino != hwpoison_filter_memcg)
148 return -EINVAL;
149
150 return 0;
151}
152#else
153static int hwpoison_filter_task(struct page *p) { return 0; }
154#endif
155
156int hwpoison_filter(struct page *p)
157{
158 if (!hwpoison_filter_enable)
159 return 0;
160
161 if (hwpoison_filter_dev(p))
162 return -EINVAL;
163
164 if (hwpoison_filter_flags(p))
165 return -EINVAL;
166
167 if (hwpoison_filter_task(p))
168 return -EINVAL;
169
170 return 0;
171}
172#else
173int hwpoison_filter(struct page *p)
174{
175 return 0;
176}
177#endif
178
179EXPORT_SYMBOL_GPL(hwpoison_filter);
180
181
182
183
184
185static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
186 unsigned long pfn, struct page *page)
187{
188 struct siginfo si;
189 int ret;
190
191 printk(KERN_ERR
192 "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
193 pfn, t->comm, t->pid);
194 si.si_signo = SIGBUS;
195 si.si_errno = 0;
196 si.si_code = BUS_MCEERR_AO;
197 si.si_addr = (void *)addr;
198#ifdef __ARCH_SI_TRAPNO
199 si.si_trapno = trapno;
200#endif
201 si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
202
203
204
205
206
207
208 ret = send_sig_info(SIGBUS, &si, t);
209 if (ret < 0)
210 printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
211 t->comm, t->pid, ret);
212 return ret;
213}
214
215
216
217
218
219void shake_page(struct page *p, int access)
220{
221 if (!PageSlab(p)) {
222 lru_add_drain_all();
223 if (PageLRU(p))
224 return;
225 drain_all_pages();
226 if (PageLRU(p) || is_free_buddy_page(p))
227 return;
228 }
229
230
231
232
233
234 if (access) {
235 int nr;
236 do {
237 nr = shrink_slab(1000, GFP_KERNEL, 1000);
238 if (page_count(p) == 1)
239 break;
240 } while (nr > 10);
241 }
242}
243EXPORT_SYMBOL_GPL(shake_page);
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267struct to_kill {
268 struct list_head nd;
269 struct task_struct *tsk;
270 unsigned long addr;
271 unsigned addr_valid:1;
272};
273
274
275
276
277
278
279
280
281
282
283
284static void add_to_kill(struct task_struct *tsk, struct page *p,
285 struct vm_area_struct *vma,
286 struct list_head *to_kill,
287 struct to_kill **tkc)
288{
289 struct to_kill *tk;
290
291 if (*tkc) {
292 tk = *tkc;
293 *tkc = NULL;
294 } else {
295 tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
296 if (!tk) {
297 printk(KERN_ERR
298 "MCE: Out of memory while machine check handling\n");
299 return;
300 }
301 }
302 tk->addr = page_address_in_vma(p, vma);
303 tk->addr_valid = 1;
304
305
306
307
308
309
310
311 if (tk->addr == -EFAULT) {
312 pr_debug("MCE: Unable to find user space address %lx in %s\n",
313 page_to_pfn(p), tsk->comm);
314 tk->addr_valid = 0;
315 }
316 get_task_struct(tsk);
317 tk->tsk = tsk;
318 list_add_tail(&tk->nd, to_kill);
319}
320
321
322
323
324
325
326
327
328
329static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
330 int fail, struct page *page, unsigned long pfn)
331{
332 struct to_kill *tk, *next;
333
334 list_for_each_entry_safe (tk, next, to_kill, nd) {
335 if (doit) {
336
337
338
339
340
341 if (fail || tk->addr_valid == 0) {
342 printk(KERN_ERR
343 "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
344 pfn, tk->tsk->comm, tk->tsk->pid);
345 force_sig(SIGKILL, tk->tsk);
346 }
347
348
349
350
351
352
353
354 else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
355 pfn, page) < 0)
356 printk(KERN_ERR
357 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
358 pfn, tk->tsk->comm, tk->tsk->pid);
359 }
360 put_task_struct(tk->tsk);
361 kfree(tk);
362 }
363}
364
365static int task_early_kill(struct task_struct *tsk)
366{
367 if (!tsk->mm)
368 return 0;
369 if (tsk->flags & PF_MCE_PROCESS)
370 return !!(tsk->flags & PF_MCE_EARLY);
371 return sysctl_memory_failure_early_kill;
372}
373
374
375
376
377static void collect_procs_anon(struct page *page, struct list_head *to_kill,
378 struct to_kill **tkc)
379{
380 struct vm_area_struct *vma;
381 struct task_struct *tsk;
382 struct anon_vma *av;
383
384 read_lock(&tasklist_lock);
385 av = page_lock_anon_vma(page);
386 if (av == NULL)
387 goto out;
388 for_each_process (tsk) {
389 struct anon_vma_chain *vmac;
390
391 if (!task_early_kill(tsk))
392 continue;
393 list_for_each_entry(vmac, &av->head, same_anon_vma) {
394 vma = vmac->vma;
395 if (!page_mapped_in_vma(page, vma))
396 continue;
397 if (vma->vm_mm == tsk->mm)
398 add_to_kill(tsk, page, vma, to_kill, tkc);
399 }
400 }
401 page_unlock_anon_vma(av);
402out:
403 read_unlock(&tasklist_lock);
404}
405
406
407
408
409static void collect_procs_file(struct page *page, struct list_head *to_kill,
410 struct to_kill **tkc)
411{
412 struct vm_area_struct *vma;
413 struct task_struct *tsk;
414 struct prio_tree_iter iter;
415 struct address_space *mapping = page->mapping;
416
417
418
419
420
421
422
423
424
425
426 read_lock(&tasklist_lock);
427 spin_lock(&mapping->i_mmap_lock);
428 for_each_process(tsk) {
429 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
430
431 if (!task_early_kill(tsk))
432 continue;
433
434 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
435 pgoff) {
436
437
438
439
440
441
442
443 if (vma->vm_mm == tsk->mm)
444 add_to_kill(tsk, page, vma, to_kill, tkc);
445 }
446 }
447 spin_unlock(&mapping->i_mmap_lock);
448 read_unlock(&tasklist_lock);
449}
450
451
452
453
454
455
456
457static void collect_procs(struct page *page, struct list_head *tokill)
458{
459 struct to_kill *tk;
460
461 if (!page->mapping)
462 return;
463
464 tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
465 if (!tk)
466 return;
467 if (PageAnon(page))
468 collect_procs_anon(page, tokill, &tk);
469 else
470 collect_procs_file(page, tokill, &tk);
471 kfree(tk);
472}
473
474
475
476
477
478enum outcome {
479 IGNORED,
480 FAILED,
481 DELAYED,
482 RECOVERED,
483};
484
485static const char *action_name[] = {
486 [IGNORED] = "Ignored",
487 [FAILED] = "Failed",
488 [DELAYED] = "Delayed",
489 [RECOVERED] = "Recovered",
490};
491
492
493
494
495
496
497
498static int delete_from_lru_cache(struct page *p)
499{
500 if (!isolate_lru_page(p)) {
501
502
503
504
505 ClearPageActive(p);
506 ClearPageUnevictable(p);
507
508
509
510 page_cache_release(p);
511 return 0;
512 }
513 return -EIO;
514}
515
516
517
518
519
520
521static int me_kernel(struct page *p, unsigned long pfn)
522{
523 return IGNORED;
524}
525
526
527
528
529static int me_unknown(struct page *p, unsigned long pfn)
530{
531 printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
532 return FAILED;
533}
534
535
536
537
538static int me_pagecache_clean(struct page *p, unsigned long pfn)
539{
540 int err;
541 int ret = FAILED;
542 struct address_space *mapping;
543
544 delete_from_lru_cache(p);
545
546
547
548
549
550 if (PageAnon(p))
551 return RECOVERED;
552
553
554
555
556
557
558
559
560 mapping = page_mapping(p);
561 if (!mapping) {
562
563
564
565 return FAILED;
566 }
567
568
569
570
571
572
573 if (mapping->a_ops->error_remove_page) {
574 err = mapping->a_ops->error_remove_page(mapping, p);
575 if (err != 0) {
576 printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
577 pfn, err);
578 } else if (page_has_private(p) &&
579 !try_to_release_page(p, GFP_NOIO)) {
580 pr_debug("MCE %#lx: failed to release buffers\n", pfn);
581 } else {
582 ret = RECOVERED;
583 }
584 } else {
585
586
587
588
589 if (invalidate_inode_page(p))
590 ret = RECOVERED;
591 else
592 printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
593 pfn);
594 }
595 return ret;
596}
597
598
599
600
601
602
603static int me_pagecache_dirty(struct page *p, unsigned long pfn)
604{
605 struct address_space *mapping = page_mapping(p);
606
607 SetPageError(p);
608
609 if (mapping) {
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644 mapping_set_error(mapping, EIO);
645 }
646
647 return me_pagecache_clean(p, pfn);
648}
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669static int me_swapcache_dirty(struct page *p, unsigned long pfn)
670{
671 ClearPageDirty(p);
672
673 ClearPageUptodate(p);
674
675 if (!delete_from_lru_cache(p))
676 return DELAYED;
677 else
678 return FAILED;
679}
680
681static int me_swapcache_clean(struct page *p, unsigned long pfn)
682{
683 delete_from_swap_cache(p);
684
685 if (!delete_from_lru_cache(p))
686 return RECOVERED;
687 else
688 return FAILED;
689}
690
691
692
693
694
695
696
697
698
699static int me_huge_page(struct page *p, unsigned long pfn)
700{
701 struct page *hpage = compound_head(p);
702
703
704
705
706
707
708
709
710
711
712 if (!(page_mapping(hpage) || PageAnon(hpage))) {
713 __isolate_hwpoisoned_huge_page(hpage);
714 return RECOVERED;
715 }
716 return DELAYED;
717}
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732#define dirty (1UL << PG_dirty)
733#define sc (1UL << PG_swapcache)
734#define unevict (1UL << PG_unevictable)
735#define mlock (1UL << PG_mlocked)
736#define writeback (1UL << PG_writeback)
737#define lru (1UL << PG_lru)
738#define swapbacked (1UL << PG_swapbacked)
739#define head (1UL << PG_head)
740#define tail (1UL << PG_tail)
741#define compound (1UL << PG_compound)
742#define slab (1UL << PG_slab)
743#define reserved (1UL << PG_reserved)
744
745static struct page_state {
746 unsigned long mask;
747 unsigned long res;
748 char *msg;
749 int (*action)(struct page *p, unsigned long pfn);
750} error_states[] = {
751 { reserved, reserved, "reserved kernel", me_kernel },
752
753
754
755
756
757
758
759
760
761
762 { slab, slab, "kernel slab", me_kernel },
763
764#ifdef CONFIG_PAGEFLAGS_EXTENDED
765 { head, head, "huge", me_huge_page },
766 { tail, tail, "huge", me_huge_page },
767#else
768 { compound, compound, "huge", me_huge_page },
769#endif
770
771 { sc|dirty, sc|dirty, "swapcache", me_swapcache_dirty },
772 { sc|dirty, sc, "swapcache", me_swapcache_clean },
773
774 { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty},
775 { unevict, unevict, "unevictable LRU", me_pagecache_clean},
776
777 { mlock|dirty, mlock|dirty, "mlocked LRU", me_pagecache_dirty },
778 { mlock, mlock, "mlocked LRU", me_pagecache_clean },
779
780 { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty },
781 { lru|dirty, lru, "clean LRU", me_pagecache_clean },
782
783
784
785
786 { 0, 0, "unknown page state", me_unknown },
787};
788
789#undef dirty
790#undef sc
791#undef unevict
792#undef mlock
793#undef writeback
794#undef lru
795#undef swapbacked
796#undef head
797#undef tail
798#undef compound
799#undef slab
800#undef reserved
801
802static void action_result(unsigned long pfn, char *msg, int result)
803{
804 struct page *page = pfn_to_page(pfn);
805
806 printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
807 pfn,
808 PageDirty(page) ? "dirty " : "",
809 msg, action_name[result]);
810}
811
812static int page_action(struct page_state *ps, struct page *p,
813 unsigned long pfn)
814{
815 int result;
816 int count;
817
818 result = ps->action(p, pfn);
819 action_result(pfn, ps->msg, result);
820
821 count = page_count(p) - 1;
822 if (ps->action == me_swapcache_dirty && result == DELAYED)
823 count--;
824 if (count != 0) {
825 printk(KERN_ERR
826 "MCE %#lx: %s page still referenced by %d users\n",
827 pfn, ps->msg, count);
828 result = FAILED;
829 }
830
831
832
833
834
835
836 return (result == RECOVERED || result == DELAYED) ? 0 : -EBUSY;
837}
838
839#define N_UNMAP_TRIES 5
840
841
842
843
844
845static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
846 int trapno)
847{
848 enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
849 struct address_space *mapping;
850 LIST_HEAD(tokill);
851 int ret;
852 int i;
853 int kill = 1;
854 struct page *hpage = compound_head(p);
855
856 if (PageReserved(p) || PageSlab(p))
857 return SWAP_SUCCESS;
858
859
860
861
862
863 if (!page_mapped(hpage))
864 return SWAP_SUCCESS;
865
866 if (PageKsm(p))
867 return SWAP_FAIL;
868
869 if (PageSwapCache(p)) {
870 printk(KERN_ERR
871 "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
872 ttu |= TTU_IGNORE_HWPOISON;
873 }
874
875
876
877
878
879
880
881 mapping = page_mapping(hpage);
882 if (!PageDirty(hpage) && mapping &&
883 mapping_cap_writeback_dirty(mapping)) {
884 if (page_mkclean(hpage)) {
885 SetPageDirty(hpage);
886 } else {
887 kill = 0;
888 ttu |= TTU_IGNORE_HWPOISON;
889 printk(KERN_INFO
890 "MCE %#lx: corrupted page was clean: dropped without side effects\n",
891 pfn);
892 }
893 }
894
895
896
897
898
899
900
901
902
903 if (kill)
904 collect_procs(hpage, &tokill);
905
906
907
908
909
910 for (i = 0; i < N_UNMAP_TRIES; i++) {
911 ret = try_to_unmap(hpage, ttu);
912 if (ret == SWAP_SUCCESS)
913 break;
914 pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn, ret);
915 }
916
917 if (ret != SWAP_SUCCESS)
918 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
919 pfn, page_mapcount(hpage));
920
921
922
923
924
925
926
927
928
929
930 kill_procs_ao(&tokill, !!PageDirty(hpage), trapno,
931 ret != SWAP_SUCCESS, p, pfn);
932
933 return ret;
934}
935
936static void set_page_hwpoison_huge_page(struct page *hpage)
937{
938 int i;
939 int nr_pages = 1 << compound_order(hpage);
940 for (i = 0; i < nr_pages; i++)
941 SetPageHWPoison(hpage + i);
942}
943
944static void clear_page_hwpoison_huge_page(struct page *hpage)
945{
946 int i;
947 int nr_pages = 1 << compound_order(hpage);
948 for (i = 0; i < nr_pages; i++)
949 ClearPageHWPoison(hpage + i);
950}
951
952int __memory_failure(unsigned long pfn, int trapno, int flags)
953{
954 struct page_state *ps;
955 struct page *p;
956 struct page *hpage;
957 int res;
958 unsigned int nr_pages;
959
960 if (!sysctl_memory_failure_recovery)
961 panic("Memory failure from trap %d on page %lx", trapno, pfn);
962
963 if (!pfn_valid(pfn)) {
964 printk(KERN_ERR
965 "MCE %#lx: memory outside kernel control\n",
966 pfn);
967 return -ENXIO;
968 }
969
970 p = pfn_to_page(pfn);
971 hpage = compound_head(p);
972 if (TestSetPageHWPoison(p)) {
973 printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn);
974 return 0;
975 }
976
977 nr_pages = 1 << compound_order(hpage);
978 atomic_long_add(nr_pages, &mce_bad_pages);
979
980
981
982
983
984
985
986
987
988
989
990
991 if (!(flags & MF_COUNT_INCREASED) &&
992 !get_page_unless_zero(hpage)) {
993 if (is_free_buddy_page(p)) {
994 action_result(pfn, "free buddy", DELAYED);
995 return 0;
996 } else {
997 action_result(pfn, "high order kernel", IGNORED);
998 return -EBUSY;
999 }
1000 }
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010 if (!PageLRU(p) && !PageHuge(p))
1011 shake_page(p, 0);
1012 if (!PageLRU(p) && !PageHuge(p)) {
1013
1014
1015
1016 if (is_free_buddy_page(p)) {
1017 action_result(pfn, "free buddy, 2nd try", DELAYED);
1018 return 0;
1019 }
1020 action_result(pfn, "non LRU", IGNORED);
1021 put_page(p);
1022 return -EBUSY;
1023 }
1024
1025
1026
1027
1028
1029
1030 lock_page_nosync(hpage);
1031
1032
1033
1034
1035 if (!PageHWPoison(p)) {
1036 printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
1037 res = 0;
1038 goto out;
1039 }
1040 if (hwpoison_filter(p)) {
1041 if (TestClearPageHWPoison(p))
1042 atomic_long_sub(nr_pages, &mce_bad_pages);
1043 unlock_page(hpage);
1044 put_page(hpage);
1045 return 0;
1046 }
1047
1048
1049
1050
1051
1052 if (PageTail(p) && TestSetPageHWPoison(hpage)) {
1053 action_result(pfn, "hugepage already hardware poisoned",
1054 IGNORED);
1055 unlock_page(hpage);
1056 put_page(hpage);
1057 return 0;
1058 }
1059
1060
1061
1062
1063
1064
1065 if (PageHuge(p))
1066 set_page_hwpoison_huge_page(hpage);
1067
1068 wait_on_page_writeback(p);
1069
1070
1071
1072
1073
1074 if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
1075 printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
1076 res = -EBUSY;
1077 goto out;
1078 }
1079
1080
1081
1082
1083 if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
1084 action_result(pfn, "already truncated LRU", IGNORED);
1085 res = -EBUSY;
1086 goto out;
1087 }
1088
1089 res = -EBUSY;
1090 for (ps = error_states;; ps++) {
1091 if ((p->flags & ps->mask) == ps->res) {
1092 res = page_action(ps, p, pfn);
1093 break;
1094 }
1095 }
1096out:
1097 unlock_page(hpage);
1098 return res;
1099}
1100EXPORT_SYMBOL_GPL(__memory_failure);
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119void memory_failure(unsigned long pfn, int trapno)
1120{
1121 __memory_failure(pfn, trapno, 0);
1122}
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136int unpoison_memory(unsigned long pfn)
1137{
1138 struct page *page;
1139 struct page *p;
1140 int freeit = 0;
1141 unsigned int nr_pages;
1142
1143 if (!pfn_valid(pfn))
1144 return -ENXIO;
1145
1146 p = pfn_to_page(pfn);
1147 page = compound_head(p);
1148
1149 if (!PageHWPoison(p)) {
1150 pr_debug("MCE: Page was already unpoisoned %#lx\n", pfn);
1151 return 0;
1152 }
1153
1154 nr_pages = 1 << compound_order(page);
1155
1156 if (!get_page_unless_zero(page)) {
1157 if (TestClearPageHWPoison(p))
1158 atomic_long_sub(nr_pages, &mce_bad_pages);
1159 pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn);
1160 return 0;
1161 }
1162
1163 lock_page_nosync(page);
1164
1165
1166
1167
1168
1169
1170 if (TestClearPageHWPoison(page)) {
1171 pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn);
1172 atomic_long_sub(nr_pages, &mce_bad_pages);
1173 freeit = 1;
1174 }
1175 if (PageHuge(p))
1176 clear_page_hwpoison_huge_page(page);
1177 unlock_page(page);
1178
1179 put_page(page);
1180 if (freeit)
1181 put_page(page);
1182
1183 return 0;
1184}
1185EXPORT_SYMBOL(unpoison_memory);
1186
1187static struct page *new_page(struct page *p, unsigned long private, int **x)
1188{
1189 int nid = page_to_nid(p);
1190 return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
1191}
1192
1193
1194
1195
1196
1197
1198
1199static int get_any_page(struct page *p, unsigned long pfn, int flags)
1200{
1201 int ret;
1202
1203 if (flags & MF_COUNT_INCREASED)
1204 return 1;
1205
1206
1207
1208
1209
1210
1211 lock_system_sleep();
1212
1213
1214
1215
1216
1217 set_migratetype_isolate(p);
1218 if (!get_page_unless_zero(compound_head(p))) {
1219 if (is_free_buddy_page(p)) {
1220 pr_debug("get_any_page: %#lx free buddy page\n", pfn);
1221
1222 SetPageHWPoison(p);
1223 ret = 0;
1224 } else {
1225 pr_debug("get_any_page: %#lx: unknown zero refcount page type %lx\n",
1226 pfn, p->flags);
1227 ret = -EIO;
1228 }
1229 } else {
1230
1231 ret = 1;
1232 }
1233 unset_migratetype_isolate(p);
1234 unlock_system_sleep();
1235 return ret;
1236}
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260int soft_offline_page(struct page *page, int flags)
1261{
1262 int ret;
1263 unsigned long pfn = page_to_pfn(page);
1264
1265 ret = get_any_page(page, pfn, flags);
1266 if (ret < 0)
1267 return ret;
1268 if (ret == 0)
1269 goto done;
1270
1271
1272
1273
1274 if (!PageLRU(page)) {
1275
1276
1277
1278 put_page(page);
1279 shake_page(page, 1);
1280
1281
1282
1283
1284 ret = get_any_page(page, pfn, 0);
1285 if (ret < 0)
1286 return ret;
1287 if (ret == 0)
1288 goto done;
1289 }
1290 if (!PageLRU(page)) {
1291 pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n",
1292 pfn, page->flags);
1293 return -EIO;
1294 }
1295
1296 lock_page(page);
1297 wait_on_page_writeback(page);
1298
1299
1300
1301
1302 if (PageHWPoison(page)) {
1303 unlock_page(page);
1304 put_page(page);
1305 pr_debug("soft offline: %#lx page already poisoned\n", pfn);
1306 return -EBUSY;
1307 }
1308
1309
1310
1311
1312
1313 ret = invalidate_inode_page(page);
1314 unlock_page(page);
1315
1316
1317
1318
1319
1320
1321
1322
1323 put_page(page);
1324 if (ret == 1) {
1325 ret = 0;
1326 pr_debug("soft_offline: %#lx: invalidated\n", pfn);
1327 goto done;
1328 }
1329
1330
1331
1332
1333
1334
1335 ret = isolate_lru_page(page);
1336 if (!ret) {
1337 LIST_HEAD(pagelist);
1338
1339 list_add(&page->lru, &pagelist);
1340 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
1341 if (ret) {
1342 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
1343 pfn, ret, page->flags);
1344 if (ret > 0)
1345 ret = -EIO;
1346 }
1347 } else {
1348 pr_debug("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
1349 pfn, ret, page_count(page), page->flags);
1350 }
1351 if (ret)
1352 return ret;
1353
1354done:
1355 atomic_long_add(1, &mce_bad_pages);
1356 SetPageHWPoison(page);
1357
1358 return ret;
1359}
1360
1361
1362
1363
1364int is_hwpoison_address(unsigned long addr)
1365{
1366 pgd_t *pgdp;
1367 pud_t pud, *pudp;
1368 pmd_t pmd, *pmdp;
1369 pte_t pte, *ptep;
1370 swp_entry_t entry;
1371
1372 pgdp = pgd_offset(current->mm, addr);
1373 if (!pgd_present(*pgdp))
1374 return 0;
1375 pudp = pud_offset(pgdp, addr);
1376 pud = *pudp;
1377 if (!pud_present(pud) || pud_large(pud))
1378 return 0;
1379 pmdp = pmd_offset(pudp, addr);
1380 pmd = *pmdp;
1381 if (!pmd_present(pmd) || pmd_large(pmd))
1382 return 0;
1383 ptep = pte_offset_map(pmdp, addr);
1384 pte = *ptep;
1385 pte_unmap(ptep);
1386 if (!is_swap_pte(pte))
1387 return 0;
1388 entry = pte_to_swp_entry(pte);
1389 return is_hwpoison_entry(entry);
1390}
1391EXPORT_SYMBOL_GPL(is_hwpoison_address);
1392