1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#define pr_fmt(fmt) "AER: " fmt
16#define dev_fmt pr_fmt
17
18#include <linux/bitops.h>
19#include <linux/cper.h>
20#include <linux/pci.h>
21#include <linux/pci-acpi.h>
22#include <linux/sched.h>
23#include <linux/kernel.h>
24#include <linux/errno.h>
25#include <linux/pm.h>
26#include <linux/init.h>
27#include <linux/interrupt.h>
28#include <linux/delay.h>
29#include <linux/kfifo.h>
30#include <linux/slab.h>
31#include <acpi/apei.h>
32#include <ras/ras_event.h>
33
34#include "../pci.h"
35#include "portdrv.h"
36
37#define AER_ERROR_SOURCES_MAX 128
38
39#define AER_MAX_TYPEOF_COR_ERRS 16
40#define AER_MAX_TYPEOF_UNCOR_ERRS 27
41
42struct aer_err_source {
43 unsigned int status;
44 unsigned int id;
45};
46
47struct aer_rpc {
48 struct pci_dev *rpd;
49 DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
50};
51
52
53struct aer_stats {
54
55
56
57
58
59
60
61
62
63
64
65 u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
66
67 u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
68
69 u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
70
71 u64 dev_total_cor_errs;
72
73 u64 dev_total_fatal_errs;
74
75 u64 dev_total_nonfatal_errs;
76
77
78
79
80
81
82
83 u64 rootport_total_cor_errs;
84 u64 rootport_total_fatal_errs;
85 u64 rootport_total_nonfatal_errs;
86};
87
88#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
89 PCI_ERR_UNC_ECRC| \
90 PCI_ERR_UNC_UNSUP| \
91 PCI_ERR_UNC_COMP_ABORT| \
92 PCI_ERR_UNC_UNX_COMP| \
93 PCI_ERR_UNC_MALF_TLP)
94
95#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
96 PCI_EXP_RTCTL_SENFEE| \
97 PCI_EXP_RTCTL_SEFEE)
98#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \
99 PCI_ERR_ROOT_CMD_NONFATAL_EN| \
100 PCI_ERR_ROOT_CMD_FATAL_EN)
101#define ERR_COR_ID(d) (d & 0xffff)
102#define ERR_UNCOR_ID(d) (d >> 16)
103
104static int pcie_aer_disable;
105static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
106
107void pci_no_aer(void)
108{
109 pcie_aer_disable = 1;
110}
111
112bool pci_aer_available(void)
113{
114 return !pcie_aer_disable && pci_msi_enabled();
115}
116
117#ifdef CONFIG_PCIE_ECRC
118
119#define ECRC_POLICY_DEFAULT 0
120#define ECRC_POLICY_OFF 1
121#define ECRC_POLICY_ON 2
122
123static int ecrc_policy = ECRC_POLICY_DEFAULT;
124
125static const char * const ecrc_policy_str[] = {
126 [ECRC_POLICY_DEFAULT] = "bios",
127 [ECRC_POLICY_OFF] = "off",
128 [ECRC_POLICY_ON] = "on"
129};
130
131
132
133
134
135
136
137static int enable_ecrc_checking(struct pci_dev *dev)
138{
139 int aer = dev->aer_cap;
140 u32 reg32;
141
142 if (!aer)
143 return -ENODEV;
144
145 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32);
146 if (reg32 & PCI_ERR_CAP_ECRC_GENC)
147 reg32 |= PCI_ERR_CAP_ECRC_GENE;
148 if (reg32 & PCI_ERR_CAP_ECRC_CHKC)
149 reg32 |= PCI_ERR_CAP_ECRC_CHKE;
150 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32);
151
152 return 0;
153}
154
155
156
157
158
159
160
161static int disable_ecrc_checking(struct pci_dev *dev)
162{
163 int aer = dev->aer_cap;
164 u32 reg32;
165
166 if (!aer)
167 return -ENODEV;
168
169 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32);
170 reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
171 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32);
172
173 return 0;
174}
175
176
177
178
179
180void pcie_set_ecrc_checking(struct pci_dev *dev)
181{
182 switch (ecrc_policy) {
183 case ECRC_POLICY_DEFAULT:
184 return;
185 case ECRC_POLICY_OFF:
186 disable_ecrc_checking(dev);
187 break;
188 case ECRC_POLICY_ON:
189 enable_ecrc_checking(dev);
190 break;
191 default:
192 return;
193 }
194}
195
196
197
198
199
200void pcie_ecrc_get_policy(char *str)
201{
202 int i;
203
204 i = match_string(ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), str);
205 if (i < 0)
206 return;
207
208 ecrc_policy = i;
209}
210#endif
211
212#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
213 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
214
215int pcie_aer_is_native(struct pci_dev *dev)
216{
217 struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
218
219 if (!dev->aer_cap)
220 return 0;
221
222 return pcie_ports_native || host->native_aer;
223}
224
225int pci_enable_pcie_error_reporting(struct pci_dev *dev)
226{
227 int rc;
228
229 if (!pcie_aer_is_native(dev))
230 return -EIO;
231
232 rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
233 return pcibios_err_to_errno(rc);
234}
235EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
236
237int pci_disable_pcie_error_reporting(struct pci_dev *dev)
238{
239 int rc;
240
241 if (!pcie_aer_is_native(dev))
242 return -EIO;
243
244 rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
245 return pcibios_err_to_errno(rc);
246}
247EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
248
249int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
250{
251 int aer = dev->aer_cap;
252 u32 status, sev;
253
254 if (!pcie_aer_is_native(dev))
255 return -EIO;
256
257
258 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
259 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev);
260 status &= ~sev;
261 if (status)
262 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
263
264 return 0;
265}
266EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status);
267
268void pci_aer_clear_fatal_status(struct pci_dev *dev)
269{
270 int aer = dev->aer_cap;
271 u32 status, sev;
272
273 if (!pcie_aer_is_native(dev))
274 return;
275
276
277 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
278 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev);
279 status &= sev;
280 if (status)
281 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
282}
283
284
285
286
287
288
289
290
291
292
293int pci_aer_raw_clear_status(struct pci_dev *dev)
294{
295 int aer = dev->aer_cap;
296 u32 status;
297 int port_type;
298
299 if (!aer)
300 return -EIO;
301
302 port_type = pci_pcie_type(dev);
303 if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
304 port_type == PCI_EXP_TYPE_RC_EC) {
305 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status);
306 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status);
307 }
308
309 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status);
310 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status);
311
312 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
313 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
314
315 return 0;
316}
317
318int pci_aer_clear_status(struct pci_dev *dev)
319{
320 if (!pcie_aer_is_native(dev))
321 return -EIO;
322
323 return pci_aer_raw_clear_status(dev);
324}
325
326void pci_save_aer_state(struct pci_dev *dev)
327{
328 int aer = dev->aer_cap;
329 struct pci_cap_saved_state *save_state;
330 u32 *cap;
331
332 if (!aer)
333 return;
334
335 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
336 if (!save_state)
337 return;
338
339 cap = &save_state->cap.data[0];
340 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++);
341 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++);
342 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++);
343 pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++);
344 if (pcie_cap_has_rtctl(dev))
345 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++);
346}
347
348void pci_restore_aer_state(struct pci_dev *dev)
349{
350 int aer = dev->aer_cap;
351 struct pci_cap_saved_state *save_state;
352 u32 *cap;
353
354 if (!aer)
355 return;
356
357 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
358 if (!save_state)
359 return;
360
361 cap = &save_state->cap.data[0];
362 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++);
363 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++);
364 pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++);
365 pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++);
366 if (pcie_cap_has_rtctl(dev))
367 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++);
368}
369
370void pci_aer_init(struct pci_dev *dev)
371{
372 int n;
373
374 dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
375 if (!dev->aer_cap)
376 return;
377
378 dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
379
380
381
382
383
384
385
386 n = pcie_cap_has_rtctl(dev) ? 5 : 4;
387 pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n);
388
389 pci_aer_clear_status(dev);
390}
391
392void pci_aer_exit(struct pci_dev *dev)
393{
394 kfree(dev->aer_stats);
395 dev->aer_stats = NULL;
396}
397
398#define AER_AGENT_RECEIVER 0
399#define AER_AGENT_REQUESTER 1
400#define AER_AGENT_COMPLETER 2
401#define AER_AGENT_TRANSMITTER 3
402
403#define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \
404 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
405#define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \
406 0 : PCI_ERR_UNC_COMP_ABORT)
407#define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \
408 (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
409
410#define AER_GET_AGENT(t, e) \
411 ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \
412 (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \
413 (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \
414 AER_AGENT_RECEIVER)
415
416#define AER_PHYSICAL_LAYER_ERROR 0
417#define AER_DATA_LINK_LAYER_ERROR 1
418#define AER_TRANSACTION_LAYER_ERROR 2
419
420#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
421 PCI_ERR_COR_RCVR : 0)
422#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
423 (PCI_ERR_COR_BAD_TLP| \
424 PCI_ERR_COR_BAD_DLLP| \
425 PCI_ERR_COR_REP_ROLL| \
426 PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)
427
428#define AER_GET_LAYER_ERROR(t, e) \
429 ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
430 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
431 AER_TRANSACTION_LAYER_ERROR)
432
433
434
435
436static const char *aer_error_severity_string[] = {
437 "Uncorrected (Non-Fatal)",
438 "Uncorrected (Fatal)",
439 "Corrected"
440};
441
442static const char *aer_error_layer[] = {
443 "Physical Layer",
444 "Data Link Layer",
445 "Transaction Layer"
446};
447
448static const char *aer_correctable_error_string[] = {
449 "RxErr",
450 NULL,
451 NULL,
452 NULL,
453 NULL,
454 NULL,
455 "BadTLP",
456 "BadDLLP",
457 "Rollover",
458 NULL,
459 NULL,
460 NULL,
461 "Timeout",
462 "NonFatalErr",
463 "CorrIntErr",
464 "HeaderOF",
465 NULL,
466 NULL,
467 NULL,
468 NULL,
469 NULL,
470 NULL,
471 NULL,
472 NULL,
473 NULL,
474 NULL,
475 NULL,
476 NULL,
477 NULL,
478 NULL,
479 NULL,
480 NULL,
481};
482
483static const char *aer_uncorrectable_error_string[] = {
484 "Undefined",
485 NULL,
486 NULL,
487 NULL,
488 "DLP",
489 "SDES",
490 NULL,
491 NULL,
492 NULL,
493 NULL,
494 NULL,
495 NULL,
496 "TLP",
497 "FCP",
498 "CmpltTO",
499 "CmpltAbrt",
500 "UnxCmplt",
501 "RxOF",
502 "MalfTLP",
503 "ECRC",
504 "UnsupReq",
505 "ACSViol",
506 "UncorrIntErr",
507 "BlockedTLP",
508 "AtomicOpBlocked",
509 "TLPBlockedErr",
510 "PoisonTLPBlocked",
511 NULL,
512 NULL,
513 NULL,
514 NULL,
515 NULL,
516};
517
518static const char *aer_agent_string[] = {
519 "Receiver ID",
520 "Requester ID",
521 "Completer ID",
522 "Transmitter ID"
523};
524
525#define aer_stats_dev_attr(name, stats_array, strings_array, \
526 total_string, total_field) \
527 static ssize_t \
528 name##_show(struct device *dev, struct device_attribute *attr, \
529 char *buf) \
530{ \
531 unsigned int i; \
532 struct pci_dev *pdev = to_pci_dev(dev); \
533 u64 *stats = pdev->aer_stats->stats_array; \
534 size_t len = 0; \
535 \
536 for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \
537 if (strings_array[i]) \
538 len += sysfs_emit_at(buf, len, "%s %llu\n", \
539 strings_array[i], \
540 stats[i]); \
541 else if (stats[i]) \
542 len += sysfs_emit_at(buf, len, \
543 #stats_array "_bit[%d] %llu\n",\
544 i, stats[i]); \
545 } \
546 len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \
547 pdev->aer_stats->total_field); \
548 return len; \
549} \
550static DEVICE_ATTR_RO(name)
551
552aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs,
553 aer_correctable_error_string, "ERR_COR",
554 dev_total_cor_errs);
555aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs,
556 aer_uncorrectable_error_string, "ERR_FATAL",
557 dev_total_fatal_errs);
558aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
559 aer_uncorrectable_error_string, "ERR_NONFATAL",
560 dev_total_nonfatal_errs);
561
562#define aer_stats_rootport_attr(name, field) \
563 static ssize_t \
564 name##_show(struct device *dev, struct device_attribute *attr, \
565 char *buf) \
566{ \
567 struct pci_dev *pdev = to_pci_dev(dev); \
568 return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field); \
569} \
570static DEVICE_ATTR_RO(name)
571
572aer_stats_rootport_attr(aer_rootport_total_err_cor,
573 rootport_total_cor_errs);
574aer_stats_rootport_attr(aer_rootport_total_err_fatal,
575 rootport_total_fatal_errs);
576aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
577 rootport_total_nonfatal_errs);
578
579static struct attribute *aer_stats_attrs[] __ro_after_init = {
580 &dev_attr_aer_dev_correctable.attr,
581 &dev_attr_aer_dev_fatal.attr,
582 &dev_attr_aer_dev_nonfatal.attr,
583 &dev_attr_aer_rootport_total_err_cor.attr,
584 &dev_attr_aer_rootport_total_err_fatal.attr,
585 &dev_attr_aer_rootport_total_err_nonfatal.attr,
586 NULL
587};
588
589static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
590 struct attribute *a, int n)
591{
592 struct device *dev = kobj_to_dev(kobj);
593 struct pci_dev *pdev = to_pci_dev(dev);
594
595 if (!pdev->aer_stats)
596 return 0;
597
598 if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
599 a == &dev_attr_aer_rootport_total_err_fatal.attr ||
600 a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
601 ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
602 (pci_pcie_type(pdev) != PCI_EXP_TYPE_RC_EC)))
603 return 0;
604
605 return a->mode;
606}
607
608const struct attribute_group aer_stats_attr_group = {
609 .attrs = aer_stats_attrs,
610 .is_visible = aer_stats_attrs_are_visible,
611};
612
613static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
614 struct aer_err_info *info)
615{
616 unsigned long status = info->status & ~info->mask;
617 int i, max = -1;
618 u64 *counter = NULL;
619 struct aer_stats *aer_stats = pdev->aer_stats;
620
621 if (!aer_stats)
622 return;
623
624 switch (info->severity) {
625 case AER_CORRECTABLE:
626 aer_stats->dev_total_cor_errs++;
627 counter = &aer_stats->dev_cor_errs[0];
628 max = AER_MAX_TYPEOF_COR_ERRS;
629 break;
630 case AER_NONFATAL:
631 aer_stats->dev_total_nonfatal_errs++;
632 counter = &aer_stats->dev_nonfatal_errs[0];
633 max = AER_MAX_TYPEOF_UNCOR_ERRS;
634 break;
635 case AER_FATAL:
636 aer_stats->dev_total_fatal_errs++;
637 counter = &aer_stats->dev_fatal_errs[0];
638 max = AER_MAX_TYPEOF_UNCOR_ERRS;
639 break;
640 }
641
642 for_each_set_bit(i, &status, max)
643 counter[i]++;
644}
645
646static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
647 struct aer_err_source *e_src)
648{
649 struct aer_stats *aer_stats = pdev->aer_stats;
650
651 if (!aer_stats)
652 return;
653
654 if (e_src->status & PCI_ERR_ROOT_COR_RCV)
655 aer_stats->rootport_total_cor_errs++;
656
657 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
658 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
659 aer_stats->rootport_total_fatal_errs++;
660 else
661 aer_stats->rootport_total_nonfatal_errs++;
662 }
663}
664
665static void __print_tlp_header(struct pci_dev *dev,
666 struct aer_header_log_regs *t)
667{
668 pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
669 t->dw0, t->dw1, t->dw2, t->dw3);
670}
671
672static void __aer_print_error(struct pci_dev *dev,
673 struct aer_err_info *info)
674{
675 const char **strings;
676 unsigned long status = info->status & ~info->mask;
677 const char *level, *errmsg;
678 int i;
679
680 if (info->severity == AER_CORRECTABLE) {
681 strings = aer_correctable_error_string;
682 level = KERN_WARNING;
683 } else {
684 strings = aer_uncorrectable_error_string;
685 level = KERN_ERR;
686 }
687
688 for_each_set_bit(i, &status, 32) {
689 errmsg = strings[i];
690 if (!errmsg)
691 errmsg = "Unknown Error Bit";
692
693 pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
694 info->first_error == i ? " (First)" : "");
695 }
696 pci_dev_aer_stats_incr(dev, info);
697}
698
699void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
700{
701 int layer, agent;
702 int id = ((dev->bus->number << 8) | dev->devfn);
703 const char *level;
704
705 if (!info->status) {
706 pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
707 aer_error_severity_string[info->severity]);
708 goto out;
709 }
710
711 layer = AER_GET_LAYER_ERROR(info->severity, info->status);
712 agent = AER_GET_AGENT(info->severity, info->status);
713
714 level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
715
716 pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
717 aer_error_severity_string[info->severity],
718 aer_error_layer[layer], aer_agent_string[agent]);
719
720 pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
721 dev->vendor, dev->device, info->status, info->mask);
722
723 __aer_print_error(dev, info);
724
725 if (info->tlp_header_valid)
726 __print_tlp_header(dev, &info->tlp);
727
728out:
729 if (info->id && info->error_dev_num > 1 && info->id == id)
730 pci_err(dev, " Error of this Agent is reported first\n");
731
732 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
733 info->severity, info->tlp_header_valid, &info->tlp);
734}
735
736static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
737{
738 u8 bus = info->id >> 8;
739 u8 devfn = info->id & 0xff;
740
741 pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n",
742 info->multi_error_valid ? "Multiple " : "",
743 aer_error_severity_string[info->severity],
744 pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn),
745 PCI_FUNC(devfn));
746}
747
748#ifdef CONFIG_ACPI_APEI_PCIEAER
749int cper_severity_to_aer(int cper_severity)
750{
751 switch (cper_severity) {
752 case CPER_SEV_RECOVERABLE:
753 return AER_NONFATAL;
754 case CPER_SEV_FATAL:
755 return AER_FATAL;
756 default:
757 return AER_CORRECTABLE;
758 }
759}
760EXPORT_SYMBOL_GPL(cper_severity_to_aer);
761
762void cper_print_aer(struct pci_dev *dev, int aer_severity,
763 struct aer_capability_regs *aer)
764{
765 int layer, agent, tlp_header_valid = 0;
766 u32 status, mask;
767 struct aer_err_info info;
768
769 if (aer_severity == AER_CORRECTABLE) {
770 status = aer->cor_status;
771 mask = aer->cor_mask;
772 } else {
773 status = aer->uncor_status;
774 mask = aer->uncor_mask;
775 tlp_header_valid = status & AER_LOG_TLP_MASKS;
776 }
777
778 layer = AER_GET_LAYER_ERROR(aer_severity, status);
779 agent = AER_GET_AGENT(aer_severity, status);
780
781 memset(&info, 0, sizeof(info));
782 info.severity = aer_severity;
783 info.status = status;
784 info.mask = mask;
785 info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
786
787 pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
788 __aer_print_error(dev, &info);
789 pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
790 aer_error_layer[layer], aer_agent_string[agent]);
791
792 if (aer_severity != AER_CORRECTABLE)
793 pci_err(dev, "aer_uncor_severity: 0x%08x\n",
794 aer->uncor_severity);
795
796 if (tlp_header_valid)
797 __print_tlp_header(dev, &aer->header_log);
798
799 trace_aer_event(dev_name(&dev->dev), (status & ~mask),
800 aer_severity, tlp_header_valid, &aer->header_log);
801}
802#endif
803
804
805
806
807
808
809static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
810{
811 if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
812 e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
813 e_info->error_dev_num++;
814 return 0;
815 }
816 return -ENOSPC;
817}
818
819
820
821
822
823
824static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
825{
826 int aer = dev->aer_cap;
827 u32 status, mask;
828 u16 reg16;
829
830
831
832
833
834 if ((PCI_BUS_NUM(e_info->id) != 0) &&
835 !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
836
837 if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
838 return true;
839
840
841 if (!e_info->multi_error_valid)
842 return false;
843 }
844
845
846
847
848
849
850
851
852
853 if (atomic_read(&dev->enable_cnt) == 0)
854 return false;
855
856
857 pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16);
858 if (!(reg16 & PCI_EXP_AER_FLAGS))
859 return false;
860
861 if (!aer)
862 return false;
863
864
865 if (e_info->severity == AER_CORRECTABLE) {
866 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status);
867 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
868 } else {
869 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
870 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
871 }
872 if (status & ~mask)
873 return true;
874
875 return false;
876}
877
878static int find_device_iter(struct pci_dev *dev, void *data)
879{
880 struct aer_err_info *e_info = (struct aer_err_info *)data;
881
882 if (is_error_source(dev, e_info)) {
883
884 if (add_error_device(e_info, dev)) {
885
886
887 return 1;
888 }
889
890
891 if (!e_info->multi_error_valid)
892 return 1;
893 }
894 return 0;
895}
896
897
898
899
900
901
902
903
904
905
906
907
908
909static bool find_source_device(struct pci_dev *parent,
910 struct aer_err_info *e_info)
911{
912 struct pci_dev *dev = parent;
913 int result;
914
915
916 e_info->error_dev_num = 0;
917
918
919 result = find_device_iter(dev, e_info);
920 if (result)
921 return true;
922
923 if (pci_pcie_type(parent) == PCI_EXP_TYPE_RC_EC)
924 pcie_walk_rcec(parent, find_device_iter, e_info);
925 else
926 pci_walk_bus(parent->subordinate, find_device_iter, e_info);
927
928 if (!e_info->error_dev_num) {
929 pci_info(parent, "can't find device of ID%04x\n", e_info->id);
930 return false;
931 }
932 return true;
933}
934
935
936
937
938
939
940
941
942static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
943{
944 int aer = dev->aer_cap;
945
946 if (info->severity == AER_CORRECTABLE) {
947
948
949
950
951 if (aer)
952 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
953 info->status);
954 if (pcie_aer_is_native(dev))
955 pcie_clear_device_status(dev);
956 } else if (info->severity == AER_NONFATAL)
957 pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
958 else if (info->severity == AER_FATAL)
959 pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset);
960 pci_dev_put(dev);
961}
962
963#ifdef CONFIG_ACPI_APEI_PCIEAER
964
965#define AER_RECOVER_RING_ORDER 4
966#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
967
968struct aer_recover_entry {
969 u8 bus;
970 u8 devfn;
971 u16 domain;
972 int severity;
973 struct aer_capability_regs *regs;
974};
975
976static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
977 AER_RECOVER_RING_SIZE);
978
979static void aer_recover_work_func(struct work_struct *work)
980{
981 struct aer_recover_entry entry;
982 struct pci_dev *pdev;
983
984 while (kfifo_get(&aer_recover_ring, &entry)) {
985 pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
986 entry.devfn);
987 if (!pdev) {
988 pr_err("no pci_dev for %04x:%02x:%02x.%x\n",
989 entry.domain, entry.bus,
990 PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
991 continue;
992 }
993 cper_print_aer(pdev, entry.severity, entry.regs);
994 if (entry.severity == AER_NONFATAL)
995 pcie_do_recovery(pdev, pci_channel_io_normal,
996 aer_root_reset);
997 else if (entry.severity == AER_FATAL)
998 pcie_do_recovery(pdev, pci_channel_io_frozen,
999 aer_root_reset);
1000 pci_dev_put(pdev);
1001 }
1002}
1003
1004
1005
1006
1007
1008
1009static DEFINE_SPINLOCK(aer_recover_ring_lock);
1010static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
1011
1012void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
1013 int severity, struct aer_capability_regs *aer_regs)
1014{
1015 struct aer_recover_entry entry = {
1016 .bus = bus,
1017 .devfn = devfn,
1018 .domain = domain,
1019 .severity = severity,
1020 .regs = aer_regs,
1021 };
1022
1023 if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1,
1024 &aer_recover_ring_lock))
1025 schedule_work(&aer_recover_work);
1026 else
1027 pr_err("buffer overflow in recovery for %04x:%02x:%02x.%x\n",
1028 domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1029}
1030EXPORT_SYMBOL_GPL(aer_recover_queue);
1031#endif
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
1043{
1044 int type = pci_pcie_type(dev);
1045 int aer = dev->aer_cap;
1046 int temp;
1047
1048
1049 info->status = 0;
1050 info->tlp_header_valid = 0;
1051
1052
1053 if (!aer)
1054 return 0;
1055
1056 if (info->severity == AER_CORRECTABLE) {
1057 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS,
1058 &info->status);
1059 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK,
1060 &info->mask);
1061 if (!(info->status & ~info->mask))
1062 return 0;
1063 } else if (type == PCI_EXP_TYPE_ROOT_PORT ||
1064 type == PCI_EXP_TYPE_RC_EC ||
1065 type == PCI_EXP_TYPE_DOWNSTREAM ||
1066 info->severity == AER_NONFATAL) {
1067
1068
1069 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS,
1070 &info->status);
1071 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK,
1072 &info->mask);
1073 if (!(info->status & ~info->mask))
1074 return 0;
1075
1076
1077 pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp);
1078 info->first_error = PCI_ERR_CAP_FEP(temp);
1079
1080 if (info->status & AER_LOG_TLP_MASKS) {
1081 info->tlp_header_valid = 1;
1082 pci_read_config_dword(dev,
1083 aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
1084 pci_read_config_dword(dev,
1085 aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
1086 pci_read_config_dword(dev,
1087 aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
1088 pci_read_config_dword(dev,
1089 aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
1090 }
1091 }
1092
1093 return 1;
1094}
1095
1096static inline void aer_process_err_devices(struct aer_err_info *e_info)
1097{
1098 int i;
1099
1100
1101 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
1102 if (aer_get_device_error_info(e_info->dev[i], e_info))
1103 aer_print_error(e_info->dev[i], e_info);
1104 }
1105 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
1106 if (aer_get_device_error_info(e_info->dev[i], e_info))
1107 handle_error_source(e_info->dev[i], e_info);
1108 }
1109}
1110
1111
1112
1113
1114
1115
1116static void aer_isr_one_error(struct aer_rpc *rpc,
1117 struct aer_err_source *e_src)
1118{
1119 struct pci_dev *pdev = rpc->rpd;
1120 struct aer_err_info e_info;
1121
1122 pci_rootport_aer_stats_incr(pdev, e_src);
1123
1124
1125
1126
1127
1128 if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
1129 e_info.id = ERR_COR_ID(e_src->id);
1130 e_info.severity = AER_CORRECTABLE;
1131
1132 if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
1133 e_info.multi_error_valid = 1;
1134 else
1135 e_info.multi_error_valid = 0;
1136 aer_print_port_info(pdev, &e_info);
1137
1138 if (find_source_device(pdev, &e_info))
1139 aer_process_err_devices(&e_info);
1140 }
1141
1142 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
1143 e_info.id = ERR_UNCOR_ID(e_src->id);
1144
1145 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
1146 e_info.severity = AER_FATAL;
1147 else
1148 e_info.severity = AER_NONFATAL;
1149
1150 if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
1151 e_info.multi_error_valid = 1;
1152 else
1153 e_info.multi_error_valid = 0;
1154
1155 aer_print_port_info(pdev, &e_info);
1156
1157 if (find_source_device(pdev, &e_info))
1158 aer_process_err_devices(&e_info);
1159 }
1160}
1161
1162
1163
1164
1165
1166
1167
1168
1169static irqreturn_t aer_isr(int irq, void *context)
1170{
1171 struct pcie_device *dev = (struct pcie_device *)context;
1172 struct aer_rpc *rpc = get_service_data(dev);
1173 struct aer_err_source e_src;
1174
1175 if (kfifo_is_empty(&rpc->aer_fifo))
1176 return IRQ_NONE;
1177
1178 while (kfifo_get(&rpc->aer_fifo, &e_src))
1179 aer_isr_one_error(rpc, &e_src);
1180 return IRQ_HANDLED;
1181}
1182
1183
1184
1185
1186
1187
1188
1189
1190static irqreturn_t aer_irq(int irq, void *context)
1191{
1192 struct pcie_device *pdev = (struct pcie_device *)context;
1193 struct aer_rpc *rpc = get_service_data(pdev);
1194 struct pci_dev *rp = rpc->rpd;
1195 int aer = rp->aer_cap;
1196 struct aer_err_source e_src = {};
1197
1198 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status);
1199 if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV)))
1200 return IRQ_NONE;
1201
1202 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id);
1203 pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status);
1204
1205 if (!kfifo_put(&rpc->aer_fifo, e_src))
1206 return IRQ_HANDLED;
1207
1208 return IRQ_WAKE_THREAD;
1209}
1210
1211static int set_device_error_reporting(struct pci_dev *dev, void *data)
1212{
1213 bool enable = *((bool *)data);
1214 int type = pci_pcie_type(dev);
1215
1216 if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
1217 (type == PCI_EXP_TYPE_RC_EC) ||
1218 (type == PCI_EXP_TYPE_UPSTREAM) ||
1219 (type == PCI_EXP_TYPE_DOWNSTREAM)) {
1220 if (enable)
1221 pci_enable_pcie_error_reporting(dev);
1222 else
1223 pci_disable_pcie_error_reporting(dev);
1224 }
1225
1226 if (enable)
1227 pcie_set_ecrc_checking(dev);
1228
1229 return 0;
1230}
1231
1232
1233
1234
1235
1236
1237static void set_downstream_devices_error_reporting(struct pci_dev *dev,
1238 bool enable)
1239{
1240 set_device_error_reporting(dev, &enable);
1241
1242 if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
1243 pcie_walk_rcec(dev, set_device_error_reporting, &enable);
1244 else if (dev->subordinate)
1245 pci_walk_bus(dev->subordinate, set_device_error_reporting,
1246 &enable);
1247
1248}
1249
1250
1251
1252
1253
1254
1255
1256static void aer_enable_rootport(struct aer_rpc *rpc)
1257{
1258 struct pci_dev *pdev = rpc->rpd;
1259 int aer = pdev->aer_cap;
1260 u16 reg16;
1261 u32 reg32;
1262
1263
1264 pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16);
1265 pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16);
1266
1267
1268 pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
1269 SYSTEM_ERROR_INTR_ON_MESG_MASK);
1270
1271
1272 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32);
1273 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32);
1274 pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, ®32);
1275 pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32);
1276 pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, ®32);
1277 pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32);
1278
1279
1280
1281
1282
1283 set_downstream_devices_error_reporting(pdev, true);
1284
1285
1286 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32);
1287 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
1288 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
1289}
1290
1291
1292
1293
1294
1295
1296
1297static void aer_disable_rootport(struct aer_rpc *rpc)
1298{
1299 struct pci_dev *pdev = rpc->rpd;
1300 int aer = pdev->aer_cap;
1301 u32 reg32;
1302
1303
1304
1305
1306
1307 set_downstream_devices_error_reporting(pdev, false);
1308
1309
1310 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32);
1311 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
1312 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
1313
1314
1315 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32);
1316 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32);
1317}
1318
1319
1320
1321
1322
1323
1324
1325static void aer_remove(struct pcie_device *dev)
1326{
1327 struct aer_rpc *rpc = get_service_data(dev);
1328
1329 aer_disable_rootport(rpc);
1330}
1331
1332
1333
1334
1335
1336
1337
1338static int aer_probe(struct pcie_device *dev)
1339{
1340 int status;
1341 struct aer_rpc *rpc;
1342 struct device *device = &dev->device;
1343 struct pci_dev *port = dev->port;
1344
1345
1346 if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
1347 (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
1348 return -ENODEV;
1349
1350 rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL);
1351 if (!rpc)
1352 return -ENOMEM;
1353
1354 rpc->rpd = port;
1355 INIT_KFIFO(rpc->aer_fifo);
1356 set_service_data(dev, rpc);
1357
1358 status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr,
1359 IRQF_SHARED, "aerdrv", dev);
1360 if (status) {
1361 pci_err(port, "request AER IRQ %d failed\n", dev->irq);
1362 return status;
1363 }
1364
1365 aer_enable_rootport(rpc);
1366 pci_info(port, "enabled with IRQ %d\n", dev->irq);
1367 return 0;
1368}
1369
1370
1371
1372
1373
1374
1375
1376static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
1377{
1378 int type = pci_pcie_type(dev);
1379 struct pci_dev *root;
1380 int aer;
1381 struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
1382 u32 reg32;
1383 int rc;
1384
1385
1386
1387
1388
1389
1390 if (type == PCI_EXP_TYPE_RC_END)
1391 root = dev->rcec;
1392 else
1393 root = pcie_find_root_port(dev);
1394
1395
1396
1397
1398
1399
1400 aer = root ? root->aer_cap : 0;
1401
1402 if ((host->native_aer || pcie_ports_native) && aer) {
1403
1404 pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32);
1405 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
1406 pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
1407 }
1408
1409 if (type == PCI_EXP_TYPE_RC_EC || type == PCI_EXP_TYPE_RC_END) {
1410 if (pcie_has_flr(dev)) {
1411 rc = pcie_flr(dev);
1412 pci_info(dev, "has been reset (%d)\n", rc);
1413 } else {
1414 pci_info(dev, "not reset (no FLR support)\n");
1415 rc = -ENOTTY;
1416 }
1417 } else {
1418 rc = pci_bus_error_reset(dev);
1419 pci_info(dev, "%s Port link has been reset (%d)\n",
1420 pci_is_root_bus(dev->bus) ? "Root" : "Downstream", rc);
1421 }
1422
1423 if ((host->native_aer || pcie_ports_native) && aer) {
1424
1425 pci_read_config_dword(root, aer + PCI_ERR_ROOT_STATUS, ®32);
1426 pci_write_config_dword(root, aer + PCI_ERR_ROOT_STATUS, reg32);
1427
1428
1429 pci_read_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, ®32);
1430 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
1431 pci_write_config_dword(root, aer + PCI_ERR_ROOT_COMMAND, reg32);
1432 }
1433
1434 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1435}
1436
1437static struct pcie_port_service_driver aerdriver = {
1438 .name = "aer",
1439 .port_type = PCIE_ANY_PORT,
1440 .service = PCIE_PORT_SERVICE_AER,
1441
1442 .probe = aer_probe,
1443 .remove = aer_remove,
1444};
1445
1446
1447
1448
1449
1450
1451int __init pcie_aer_init(void)
1452{
1453 if (!pci_aer_available())
1454 return -ENXIO;
1455 return pcie_port_service_register(&aerdriver);
1456}
1457