1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/perf_event.h>
13#include <linux/string.h>
14#include <asm/reg.h>
15#include <asm/cputable.h>
16
17
18
19
20#define PM_PMC_SH 20
21#define PM_PMC_MSK 0xf
22#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH)
23#define PM_UNIT_SH 16
24#define PM_UNIT_MSK 0xf
25#define PM_BYTE_SH 12
26#define PM_BYTE_MSK 7
27#define PM_GRS_SH 8
28#define PM_GRS_MSK 7
29#define PM_BUSEVENT_MSK 0x80
30#define PM_PMCSEL_MSK 0x7f
31
32
33#define PM_FPU 0
34#define PM_ISU0 1
35#define PM_IFU 2
36#define PM_ISU1 3
37#define PM_IDU 4
38#define PM_ISU0_ALT 6
39#define PM_GRS 7
40#define PM_LSU0 8
41#define PM_LSU1 0xc
42#define PM_LASTUNIT 0xc
43
44
45
46
47#define MMCR1_TTM0SEL_SH 62
48#define MMCR1_TTM1SEL_SH 60
49#define MMCR1_TTM2SEL_SH 58
50#define MMCR1_TTM3SEL_SH 56
51#define MMCR1_TTMSEL_MSK 3
52#define MMCR1_TD_CP_DBG0SEL_SH 54
53#define MMCR1_TD_CP_DBG1SEL_SH 52
54#define MMCR1_TD_CP_DBG2SEL_SH 50
55#define MMCR1_TD_CP_DBG3SEL_SH 48
56#define MMCR1_GRS_L2SEL_SH 46
57#define MMCR1_GRS_L2SEL_MSK 3
58#define MMCR1_GRS_L3SEL_SH 44
59#define MMCR1_GRS_L3SEL_MSK 3
60#define MMCR1_GRS_MCSEL_SH 41
61#define MMCR1_GRS_MCSEL_MSK 7
62#define MMCR1_GRS_FABSEL_SH 39
63#define MMCR1_GRS_FABSEL_MSK 3
64#define MMCR1_PMC1_ADDER_SEL_SH 35
65#define MMCR1_PMC2_ADDER_SEL_SH 34
66#define MMCR1_PMC3_ADDER_SEL_SH 33
67#define MMCR1_PMC4_ADDER_SEL_SH 32
68#define MMCR1_PMC1SEL_SH 25
69#define MMCR1_PMC2SEL_SH 17
70#define MMCR1_PMC3SEL_SH 9
71#define MMCR1_PMC4SEL_SH 1
72#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8)
73#define MMCR1_PMCSEL_MSK 0x7f
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124static const int grsel_shift[8] = {
125 MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
126 MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
127 MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
128};
129
130
131static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
132 [PM_FPU] = { 0x3200000000ul, 0x0100000000ul },
133 [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul },
134 [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul },
135 [PM_IFU] = { 0x3200000000ul, 0x2100000000ul },
136 [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul },
137 [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul },
138};
139
140static int power5p_get_constraint(u64 event, unsigned long *maskp,
141 unsigned long *valp)
142{
143 int pmc, byte, unit, sh;
144 int bit, fmask;
145 unsigned long mask = 0, value = 0;
146
147 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
148 if (pmc) {
149 if (pmc > 6)
150 return -1;
151 sh = (pmc - 1) * 2;
152 mask |= 2 << sh;
153 value |= 1 << sh;
154 if (pmc >= 5 && !(event == 0x500009 || event == 0x600005))
155 return -1;
156 }
157 if (event & PM_BUSEVENT_MSK) {
158 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
159 if (unit > PM_LASTUNIT)
160 return -1;
161 if (unit == PM_ISU0_ALT)
162 unit = PM_ISU0;
163 mask |= unit_cons[unit][0];
164 value |= unit_cons[unit][1];
165 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
166 if (byte >= 4) {
167 if (unit != PM_LSU1)
168 return -1;
169
170 ++unit;
171 byte &= 3;
172 }
173 if (unit == PM_GRS) {
174 bit = event & 7;
175 fmask = (bit == 6)? 7: 3;
176 sh = grsel_shift[bit];
177 mask |= (unsigned long)fmask << sh;
178 value |= (unsigned long)((event >> PM_GRS_SH) & fmask)
179 << sh;
180 }
181
182 mask |= 0xfUL << (24 - 4 * byte);
183 value |= (unsigned long)unit << (24 - 4 * byte);
184 }
185 if (pmc < 5) {
186
187 mask |= 0x8000000000000ul;
188 value |= 0x1000000000000ul;
189 }
190 *maskp = mask;
191 *valp = value;
192 return 0;
193}
194
195static int power5p_limited_pmc_event(u64 event)
196{
197 int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
198
199 return pmc == 5 || pmc == 6;
200}
201
202#define MAX_ALT 3
203
204static const unsigned int event_alternatives[][MAX_ALT] = {
205 { 0x100c0, 0x40001f },
206 { 0x120e4, 0x400002 },
207 { 0x230e2, 0x323087 },
208 { 0x230e3, 0x223087, 0x3230a0 },
209 { 0x410c7, 0x441084 },
210 { 0x800c4, 0xc20e0 },
211 { 0xc50c6, 0xc60e0 },
212 { 0x100005, 0x600005 },
213 { 0x100009, 0x200009 },
214 { 0x200015, 0x300015 },
215 { 0x300009, 0x400009 },
216};
217
218
219
220
221
222static int find_alternative(unsigned int event)
223{
224 int i, j;
225
226 for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
227 if (event < event_alternatives[i][0])
228 break;
229 for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
230 if (event == event_alternatives[i][j])
231 return i;
232 }
233 return -1;
234}
235
236static const unsigned char bytedecode_alternatives[4][4] = {
237 { 0x21, 0x23, 0x25, 0x27 },
238 { 0x07, 0x17, 0x0e, 0x1e },
239 { 0x20, 0x22, 0x24, 0x26 },
240 { 0x07, 0x17, 0x0e, 0x1e }
241};
242
243
244
245
246
247
248
249static s64 find_alternative_bdecode(u64 event)
250{
251 int pmc, altpmc, pp, j;
252
253 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
254 if (pmc == 0 || pmc > 4)
255 return -1;
256 altpmc = 5 - pmc;
257 pp = event & PM_PMCSEL_MSK;
258 for (j = 0; j < 4; ++j) {
259 if (bytedecode_alternatives[pmc - 1][j] == pp) {
260 return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
261 (altpmc << PM_PMC_SH) |
262 bytedecode_alternatives[altpmc - 1][j];
263 }
264 }
265
266
267 if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
268 return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
269 if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
270 return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
271
272
273 if (pp == 0x10 || pp == 0x28)
274 return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
275 (altpmc << PM_PMC_SH);
276
277 return -1;
278}
279
280static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[])
281{
282 int i, j, nalt = 1;
283 int nlim;
284 s64 ae;
285
286 alt[0] = event;
287 nalt = 1;
288 nlim = power5p_limited_pmc_event(event);
289 i = find_alternative(event);
290 if (i >= 0) {
291 for (j = 0; j < MAX_ALT; ++j) {
292 ae = event_alternatives[i][j];
293 if (ae && ae != event)
294 alt[nalt++] = ae;
295 nlim += power5p_limited_pmc_event(ae);
296 }
297 } else {
298 ae = find_alternative_bdecode(event);
299 if (ae > 0)
300 alt[nalt++] = ae;
301 }
302
303 if (flags & PPMU_ONLY_COUNT_RUN) {
304
305
306
307
308
309
310
311
312
313
314 j = nalt;
315 for (i = 0; i < nalt; ++i) {
316 switch (alt[i]) {
317 case 0xf:
318 alt[j++] = 0x600005;
319 ++nlim;
320 break;
321 case 0x600005:
322 alt[j++] = 0xf;
323 break;
324 case 0x100009:
325 alt[j++] = 0x500009;
326 ++nlim;
327 break;
328 case 0x500009:
329 alt[j++] = 0x100009;
330 alt[j++] = 0x200009;
331 break;
332 }
333 }
334 nalt = j;
335 }
336
337 if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) {
338
339 j = 0;
340 for (i = 0; i < nalt; ++i) {
341 if (!power5p_limited_pmc_event(alt[i])) {
342 alt[j] = alt[i];
343 ++j;
344 }
345 }
346 nalt = j;
347 } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) {
348
349 j = 0;
350 for (i = 0; i < nalt; ++i) {
351 if (power5p_limited_pmc_event(alt[i])) {
352 alt[j] = alt[i];
353 ++j;
354 }
355 }
356 nalt = j;
357 }
358
359 return nalt;
360}
361
362
363
364
365
366
367
368static unsigned char direct_event_is_marked[0x28] = {
369 0,
370 0x1f,
371 0x2,
372 0xe,
373 0,
374 0x1c,
375 0x80,
376 0x80,
377 0, 0, 0,
378 0x18,
379 0,
380 0x80,
381 0x80,
382 0,
383 0,
384 0x14,
385 0,
386 0x10,
387 0x1f,
388 0x2,
389 0x80,
390 0x80,
391 0, 0, 0, 0, 0,
392 0x80,
393 0x80,
394 0,
395 0x80,
396 0x80,
397 0x80,
398 0x80,
399 0x80,
400 0x80,
401 0x80,
402 0x80,
403};
404
405
406
407
408
409static int power5p_marked_instr_event(u64 event)
410{
411 int pmc, psel;
412 int bit, byte, unit;
413 u32 mask;
414
415 pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
416 psel = event & PM_PMCSEL_MSK;
417 if (pmc >= 5)
418 return 0;
419
420 bit = -1;
421 if (psel < sizeof(direct_event_is_marked)) {
422 if (direct_event_is_marked[psel] & (1 << pmc))
423 return 1;
424 if (direct_event_is_marked[psel] & 0x80)
425 bit = 4;
426 else if (psel == 0x08)
427 bit = pmc - 1;
428 else if (psel == 0x10)
429 bit = 4 - pmc;
430 else if (psel == 0x1b && (pmc == 1 || pmc == 3))
431 bit = 4;
432 } else if ((psel & 0x48) == 0x40) {
433 bit = psel & 7;
434 } else if (psel == 0x28) {
435 bit = pmc - 1;
436 } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) {
437 bit = 4;
438 }
439
440 if (!(event & PM_BUSEVENT_MSK) || bit == -1)
441 return 0;
442
443 byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
444 unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
445 if (unit == PM_LSU0) {
446
447 mask = 0x5dff00;
448 } else if (unit == PM_LSU1 && byte >= 4) {
449 byte -= 4;
450
451 mask = 0x5f11c000;
452 } else
453 return 0;
454
455 return (mask >> (byte * 8 + bit)) & 1;
456}
457
458static int power5p_compute_mmcr(u64 event[], int n_ev,
459 unsigned int hwc[], unsigned long mmcr[])
460{
461 unsigned long mmcr1 = 0;
462 unsigned long mmcra = 0;
463 unsigned int pmc, unit, byte, psel;
464 unsigned int ttm;
465 int i, isbus, bit, grsel;
466 unsigned int pmc_inuse = 0;
467 unsigned char busbyte[4];
468 unsigned char unituse[16];
469 int ttmuse;
470
471 if (n_ev > 6)
472 return -1;
473
474
475 memset(busbyte, 0, sizeof(busbyte));
476 memset(unituse, 0, sizeof(unituse));
477 for (i = 0; i < n_ev; ++i) {
478 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
479 if (pmc) {
480 if (pmc > 6)
481 return -1;
482 if (pmc_inuse & (1 << (pmc - 1)))
483 return -1;
484 pmc_inuse |= 1 << (pmc - 1);
485 }
486 if (event[i] & PM_BUSEVENT_MSK) {
487 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
488 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
489 if (unit > PM_LASTUNIT)
490 return -1;
491 if (unit == PM_ISU0_ALT)
492 unit = PM_ISU0;
493 if (byte >= 4) {
494 if (unit != PM_LSU1)
495 return -1;
496 ++unit;
497 byte &= 3;
498 }
499 if (busbyte[byte] && busbyte[byte] != unit)
500 return -1;
501 busbyte[byte] = unit;
502 unituse[unit] = 1;
503 }
504 }
505
506
507
508
509
510
511
512 if (unituse[PM_ISU0] &
513 (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
514 unituse[PM_ISU0_ALT] = 1;
515 unituse[PM_ISU0] = 0;
516 }
517
518 ttmuse = 0;
519 for (i = PM_FPU; i <= PM_ISU1; ++i) {
520 if (!unituse[i])
521 continue;
522 if (ttmuse++)
523 return -1;
524 mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH;
525 }
526 ttmuse = 0;
527 for (; i <= PM_GRS; ++i) {
528 if (!unituse[i])
529 continue;
530 if (ttmuse++)
531 return -1;
532 mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH;
533 }
534 if (ttmuse > 1)
535 return -1;
536
537
538 for (byte = 0; byte < 4; ++byte) {
539 unit = busbyte[byte];
540 if (!unit)
541 continue;
542 if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
543
544 unit = PM_ISU0_ALT;
545 } else if (unit == PM_LSU1 + 1) {
546
547 mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte);
548 }
549 ttm = unit >> 2;
550 mmcr1 |= (unsigned long)ttm
551 << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
552 }
553
554
555 for (i = 0; i < n_ev; ++i) {
556 pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
557 unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
558 byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
559 psel = event[i] & PM_PMCSEL_MSK;
560 isbus = event[i] & PM_BUSEVENT_MSK;
561 if (!pmc) {
562
563 for (pmc = 0; pmc < 4; ++pmc) {
564 if (!(pmc_inuse & (1 << pmc)))
565 break;
566 }
567 if (pmc >= 4)
568 return -1;
569 pmc_inuse |= 1 << pmc;
570 } else if (pmc <= 4) {
571
572 --pmc;
573 if (isbus && (byte & 2) &&
574 (psel == 8 || psel == 0x10 || psel == 0x28))
575
576 mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
577 } else {
578
579 --pmc;
580 }
581 if (isbus && unit == PM_GRS) {
582 bit = psel & 7;
583 grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
584 mmcr1 |= (unsigned long)grsel << grsel_shift[bit];
585 }
586 if (power5p_marked_instr_event(event[i]))
587 mmcra |= MMCRA_SAMPLE_ENABLE;
588 if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
589
590 psel |= 0x10;
591 if (pmc <= 3)
592 mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
593 hwc[i] = pmc;
594 }
595
596
597 mmcr[0] = 0;
598 if (pmc_inuse & 1)
599 mmcr[0] = MMCR0_PMC1CE;
600 if (pmc_inuse & 0x3e)
601 mmcr[0] |= MMCR0_PMCjCE;
602 mmcr[1] = mmcr1;
603 mmcr[2] = mmcra;
604 return 0;
605}
606
607static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
608{
609 if (pmc <= 3)
610 mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
611}
612
613static int power5p_generic_events[] = {
614 [PERF_COUNT_HW_CPU_CYCLES] = 0xf,
615 [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009,
616 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8,
617 [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088,
618 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4,
619 [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5,
620};
621
622#define C(x) PERF_COUNT_HW_CACHE_##x
623
624
625
626
627
628
629static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
630 [C(L1D)] = {
631 [C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
632 [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
633 [C(OP_PREFETCH)] = { 0xc70e7, -1 },
634 },
635 [C(L1I)] = {
636 [C(OP_READ)] = { 0, 0 },
637 [C(OP_WRITE)] = { -1, -1 },
638 [C(OP_PREFETCH)] = { 0, 0 },
639 },
640 [C(LL)] = {
641 [C(OP_READ)] = { 0, 0 },
642 [C(OP_WRITE)] = { 0, 0 },
643 [C(OP_PREFETCH)] = { 0xc50c3, 0 },
644 },
645 [C(DTLB)] = {
646 [C(OP_READ)] = { 0xc20e4, 0x800c4 },
647 [C(OP_WRITE)] = { -1, -1 },
648 [C(OP_PREFETCH)] = { -1, -1 },
649 },
650 [C(ITLB)] = {
651 [C(OP_READ)] = { 0, 0x800c0 },
652 [C(OP_WRITE)] = { -1, -1 },
653 [C(OP_PREFETCH)] = { -1, -1 },
654 },
655 [C(BPU)] = {
656 [C(OP_READ)] = { 0x230e4, 0x230e5 },
657 [C(OP_WRITE)] = { -1, -1 },
658 [C(OP_PREFETCH)] = { -1, -1 },
659 },
660};
661
662static struct power_pmu power5p_pmu = {
663 .name = "POWER5+/++",
664 .n_counter = 6,
665 .max_alternatives = MAX_ALT,
666 .add_fields = 0x7000000000055ul,
667 .test_adder = 0x3000040000000ul,
668 .compute_mmcr = power5p_compute_mmcr,
669 .get_constraint = power5p_get_constraint,
670 .get_alternatives = power5p_get_alternatives,
671 .disable_pmc = power5p_disable_pmc,
672 .limited_pmc_event = power5p_limited_pmc_event,
673 .flags = PPMU_LIMITED_PMC5_6,
674 .n_generic = ARRAY_SIZE(power5p_generic_events),
675 .generic_events = power5p_generic_events,
676 .cache_events = &power5p_cache_events,
677};
678
679static int init_power5p_pmu(void)
680{
681 if (!cur_cpu_spec->oprofile_cpu_type ||
682 (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
683 && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")))
684 return -ENODEV;
685
686 return register_power_pmu(&power5p_pmu);
687}
688
689arch_initcall(init_power5p_pmu);
690