1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/delay.h>
26#include <linux/interrupt.h>
27#include <linux/irq.h>
28#include <linux/pci.h>
29#include <asm/eeh.h>
30#include <asm/eeh_event.h>
31#include <asm/ppc-pci.h>
32#include <asm/pci-bridge.h>
33#include <asm/prom.h>
34#include <asm/rtas.h>
35
36
37static inline const char * pcid_name (struct pci_dev *pdev)
38{
39 if (pdev && pdev->dev.driver)
40 return pdev->dev.driver->name;
41 return "";
42}
43
44#if 0
45static void print_device_node_tree(struct pci_dn *pdn, int dent)
46{
47 int i;
48 struct device_node *pc;
49
50 if (!pdn)
51 return;
52 for (i = 0; i < dent; i++)
53 printk(" ");
54 printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
55 pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
56 pdn->eeh_pe_config_addr, pdn->node->full_name);
57 dent += 3;
58 pc = pdn->node->child;
59 while (pc) {
60 print_device_node_tree(PCI_DN(pc), dent);
61 pc = pc->sibling;
62 }
63}
64#endif
65
66
67
68
69static void eeh_disable_irq(struct pci_dev *dev)
70{
71 struct device_node *dn = pci_device_to_OF_node(dev);
72
73
74
75
76
77 if (dev->msi_enabled || dev->msix_enabled)
78 return;
79
80 if (!irq_has_action(dev->irq))
81 return;
82
83 PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
84 disable_irq_nosync(dev->irq);
85}
86
87
88
89
90static void eeh_enable_irq(struct pci_dev *dev)
91{
92 struct device_node *dn = pci_device_to_OF_node(dev);
93
94 if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
95 PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
96 enable_irq(dev->irq);
97 }
98}
99
100
101
102
103
104
105
106
107
108
109static int eeh_report_error(struct pci_dev *dev, void *userdata)
110{
111 enum pci_ers_result rc, *res = userdata;
112 struct pci_driver *driver = dev->driver;
113
114 dev->error_state = pci_channel_io_frozen;
115
116 if (!driver)
117 return 0;
118
119 eeh_disable_irq(dev);
120
121 if (!driver->err_handler ||
122 !driver->err_handler->error_detected)
123 return 0;
124
125 rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
126
127
128 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
129 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
130
131 return 0;
132}
133
134
135
136
137
138
139
140
141
142static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
143{
144 enum pci_ers_result rc, *res = userdata;
145 struct pci_driver *driver = dev->driver;
146
147 if (!driver ||
148 !driver->err_handler ||
149 !driver->err_handler->mmio_enabled)
150 return 0;
151
152 rc = driver->err_handler->mmio_enabled (dev);
153
154
155 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
156 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
157
158 return 0;
159}
160
161
162
163
164
165static int eeh_report_reset(struct pci_dev *dev, void *userdata)
166{
167 enum pci_ers_result rc, *res = userdata;
168 struct pci_driver *driver = dev->driver;
169
170 if (!driver)
171 return 0;
172
173 dev->error_state = pci_channel_io_normal;
174
175 eeh_enable_irq(dev);
176
177 if (!driver->err_handler ||
178 !driver->err_handler->slot_reset)
179 return 0;
180
181 rc = driver->err_handler->slot_reset(dev);
182 if ((*res == PCI_ERS_RESULT_NONE) ||
183 (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
184 if (*res == PCI_ERS_RESULT_DISCONNECT &&
185 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
186
187 return 0;
188}
189
190
191
192
193
194static int eeh_report_resume(struct pci_dev *dev, void *userdata)
195{
196 struct pci_driver *driver = dev->driver;
197
198 dev->error_state = pci_channel_io_normal;
199
200 if (!driver)
201 return 0;
202
203 eeh_enable_irq(dev);
204
205 if (!driver->err_handler ||
206 !driver->err_handler->resume)
207 return 0;
208
209 driver->err_handler->resume(dev);
210
211 return 0;
212}
213
214
215
216
217
218
219
220
221static int eeh_report_failure(struct pci_dev *dev, void *userdata)
222{
223 struct pci_driver *driver = dev->driver;
224
225 dev->error_state = pci_channel_io_perm_failure;
226
227 if (!driver)
228 return 0;
229
230 eeh_disable_irq(dev);
231
232 if (!driver->err_handler ||
233 !driver->err_handler->error_detected)
234 return 0;
235
236 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
237
238 return 0;
239}
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
272{
273 struct device_node *dn;
274 int cnt, rc;
275
276
277 cnt = pe_dn->eeh_freeze_count;
278
279 if (bus)
280 pcibios_remove_pci_devices(bus);
281
282
283
284
285 rc = rtas_set_slot_reset(pe_dn);
286 if (rc)
287 return rc;
288
289
290 dn = pe_dn->node;
291 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
292 dn = dn->parent->child;
293
294 while (dn) {
295 struct pci_dn *ppe = PCI_DN(dn);
296
297 if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
298 rtas_configure_bridge(ppe);
299 eeh_restore_bars(ppe);
300 }
301 dn = dn->sibling;
302 }
303
304
305
306
307
308
309
310 if (bus) {
311 ssleep (5);
312 pcibios_add_pci_devices(bus);
313 }
314 pe_dn->eeh_freeze_count = cnt;
315
316 return 0;
317}
318
319
320
321
322#define MAX_WAIT_FOR_RECOVERY 150
323
324struct pci_dn * handle_eeh_events (struct eeh_event *event)
325{
326 struct device_node *frozen_dn;
327 struct pci_dn *frozen_pdn;
328 struct pci_bus *frozen_bus;
329 int rc = 0;
330 enum pci_ers_result result = PCI_ERS_RESULT_NONE;
331 const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
332
333 frozen_dn = find_device_pe(event->dn);
334 if (!frozen_dn) {
335
336 location = of_get_property(event->dn, "ibm,loc-code", NULL);
337 location = location ? location : "unknown";
338 printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
339 "for location=%s pci addr=%s\n",
340 location, eeh_pci_name(event->dev));
341 return NULL;
342 }
343
344 frozen_bus = pcibios_find_pci_bus(frozen_dn);
345 location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
346 location = location ? location : "unknown";
347
348
349
350
351
352
353
354 if (!frozen_bus)
355 frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);
356
357 if (!frozen_bus) {
358 printk(KERN_ERR "EEH: Cannot find PCI bus "
359 "for location=%s dn=%s\n",
360 location, frozen_dn->full_name);
361 return NULL;
362 }
363
364 frozen_pdn = PCI_DN(frozen_dn);
365 frozen_pdn->eeh_freeze_count++;
366
367 pci_str = eeh_pci_name(event->dev);
368 drv_str = pcid_name(event->dev);
369
370 if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
371 goto excess_failures;
372
373 printk(KERN_WARNING
374 "EEH: This PCI device has failed %d times in the last hour:\n",
375 frozen_pdn->eeh_freeze_count);
376
377 if (frozen_pdn->pcidev) {
378 bus_pci_str = pci_name(frozen_pdn->pcidev);
379 bus_drv_str = pcid_name(frozen_pdn->pcidev);
380 printk(KERN_WARNING
381 "EEH: Bus location=%s driver=%s pci addr=%s\n",
382 location, bus_drv_str, bus_pci_str);
383 }
384
385 printk(KERN_WARNING
386 "EEH: Device location=%s driver=%s pci addr=%s\n",
387 location, drv_str, pci_str);
388
389
390
391
392
393
394
395 pci_walk_bus(frozen_bus, eeh_report_error, &result);
396
397
398
399 rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
400 if (rc < 0) {
401 printk(KERN_WARNING "EEH: Permanent failure\n");
402 goto hard_fail;
403 }
404
405
406
407
408
409 eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE);
410
411
412
413
414
415 if (result == PCI_ERS_RESULT_NONE) {
416 rc = eeh_reset_device(frozen_pdn, frozen_bus);
417 if (rc) {
418 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
419 goto hard_fail;
420 }
421 }
422
423
424 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
425 rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);
426
427 if (rc < 0)
428 goto hard_fail;
429 if (rc) {
430 result = PCI_ERS_RESULT_NEED_RESET;
431 } else {
432 result = PCI_ERS_RESULT_NONE;
433 pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
434 }
435 }
436
437
438 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
439 rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);
440
441 if (rc < 0)
442 goto hard_fail;
443 if (rc)
444 result = PCI_ERS_RESULT_NEED_RESET;
445 else
446 result = PCI_ERS_RESULT_RECOVERED;
447 }
448
449
450 if (result == PCI_ERS_RESULT_DISCONNECT) {
451 printk(KERN_WARNING "EEH: Device driver gave up\n");
452 goto hard_fail;
453 }
454
455
456 if (result == PCI_ERS_RESULT_NEED_RESET) {
457 rc = eeh_reset_device(frozen_pdn, NULL);
458 if (rc) {
459 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
460 goto hard_fail;
461 }
462 result = PCI_ERS_RESULT_NONE;
463 pci_walk_bus(frozen_bus, eeh_report_reset, &result);
464 }
465
466
467 if ((result != PCI_ERS_RESULT_RECOVERED) &&
468 (result != PCI_ERS_RESULT_NONE)) {
469 printk(KERN_WARNING "EEH: Not recovered\n");
470 goto hard_fail;
471 }
472
473
474 pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
475
476 return frozen_pdn;
477
478excess_failures:
479
480
481
482
483
484 printk(KERN_ERR
485 "EEH: PCI device at location=%s driver=%s pci addr=%s\n"
486 "has failed %d times in the last hour "
487 "and has been permanently disabled.\n"
488 "Please try reseating this device or replacing it.\n",
489 location, drv_str, pci_str, frozen_pdn->eeh_freeze_count);
490 goto perm_error;
491
492hard_fail:
493 printk(KERN_ERR
494 "EEH: Unable to recover from failure of PCI device "
495 "at location=%s driver=%s pci addr=%s\n"
496 "Please try reseating this device or replacing it.\n",
497 location, drv_str, pci_str);
498
499perm_error:
500 eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE);
501
502
503 pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
504
505
506 pcibios_remove_pci_devices(frozen_bus);
507
508 return NULL;
509}
510
511
512