1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/module.h>
19#include <linux/slab.h>
20#include <asm/cpu_device_id.h>
21#include "internal.h"
22
23struct rmid_entry {
24 u32 rmid;
25 int busy;
26 struct list_head list;
27};
28
29
30
31
32
33
34static LIST_HEAD(rmid_free_lru);
35
36
37
38
39
40
41
42
43static unsigned int rmid_limbo_count;
44
45
46
47
48static struct rmid_entry *rmid_ptrs;
49
50
51
52
53
54bool rdt_mon_capable;
55
56
57
58
59unsigned int rdt_mon_features;
60
61
62
63
64
65unsigned int resctrl_cqm_threshold;
66
67#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84static const struct mbm_correction_factor_table {
85 u32 rmidthreshold;
86 u64 cf;
87} mbm_cf_table[] __initconst = {
88 {7, CF(1.000000)},
89 {15, CF(1.000000)},
90 {15, CF(0.969650)},
91 {31, CF(1.000000)},
92 {31, CF(1.066667)},
93 {31, CF(0.969650)},
94 {47, CF(1.142857)},
95 {63, CF(1.000000)},
96 {63, CF(1.185115)},
97 {63, CF(1.066553)},
98 {79, CF(1.454545)},
99 {95, CF(1.000000)},
100 {95, CF(1.230769)},
101 {95, CF(1.142857)},
102 {95, CF(1.066667)},
103 {127, CF(1.000000)},
104 {127, CF(1.254863)},
105 {127, CF(1.185255)},
106 {151, CF(1.000000)},
107 {127, CF(1.066667)},
108 {167, CF(1.000000)},
109 {159, CF(1.454334)},
110 {183, CF(1.000000)},
111 {127, CF(0.969744)},
112 {191, CF(1.280246)},
113 {191, CF(1.230921)},
114 {215, CF(1.000000)},
115 {191, CF(1.143118)},
116};
117
118static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
119static u64 mbm_cf __read_mostly;
120
121static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
122{
123
124 if (rmid > mbm_cf_rmidthreshold)
125 val = (val * mbm_cf) >> 20;
126
127 return val;
128}
129
130static inline struct rmid_entry *__rmid_entry(u32 rmid)
131{
132 struct rmid_entry *entry;
133
134 entry = &rmid_ptrs[rmid];
135 WARN_ON(entry->rmid != rmid);
136
137 return entry;
138}
139
140static u64 __rmid_read(u32 rmid, u32 eventid)
141{
142 u64 val;
143
144
145
146
147
148
149
150
151
152 wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
153 rdmsrl(MSR_IA32_QM_CTR, val);
154
155 return val;
156}
157
158static bool rmid_dirty(struct rmid_entry *entry)
159{
160 u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
161
162 return val >= resctrl_cqm_threshold;
163}
164
165
166
167
168
169
170
171void __check_limbo(struct rdt_domain *d, bool force_free)
172{
173 struct rmid_entry *entry;
174 struct rdt_resource *r;
175 u32 crmid = 1, nrmid;
176
177 r = &rdt_resources_all[RDT_RESOURCE_L3];
178
179
180
181
182
183
184
185 for (;;) {
186 nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
187 if (nrmid >= r->num_rmid)
188 break;
189
190 entry = __rmid_entry(nrmid);
191 if (force_free || !rmid_dirty(entry)) {
192 clear_bit(entry->rmid, d->rmid_busy_llc);
193 if (!--entry->busy) {
194 rmid_limbo_count--;
195 list_add_tail(&entry->list, &rmid_free_lru);
196 }
197 }
198 crmid = nrmid + 1;
199 }
200}
201
202bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
203{
204 return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
205}
206
207
208
209
210
211
212int alloc_rmid(void)
213{
214 struct rmid_entry *entry;
215
216 lockdep_assert_held(&rdtgroup_mutex);
217
218 if (list_empty(&rmid_free_lru))
219 return rmid_limbo_count ? -EBUSY : -ENOSPC;
220
221 entry = list_first_entry(&rmid_free_lru,
222 struct rmid_entry, list);
223 list_del(&entry->list);
224
225 return entry->rmid;
226}
227
228static void add_rmid_to_limbo(struct rmid_entry *entry)
229{
230 struct rdt_resource *r;
231 struct rdt_domain *d;
232 int cpu;
233 u64 val;
234
235 r = &rdt_resources_all[RDT_RESOURCE_L3];
236
237 entry->busy = 0;
238 cpu = get_cpu();
239 list_for_each_entry(d, &r->domains, list) {
240 if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
241 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
242 if (val <= resctrl_cqm_threshold)
243 continue;
244 }
245
246
247
248
249
250 if (!has_busy_rmid(r, d))
251 cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
252 set_bit(entry->rmid, d->rmid_busy_llc);
253 entry->busy++;
254 }
255 put_cpu();
256
257 if (entry->busy)
258 rmid_limbo_count++;
259 else
260 list_add_tail(&entry->list, &rmid_free_lru);
261}
262
263void free_rmid(u32 rmid)
264{
265 struct rmid_entry *entry;
266
267 if (!rmid)
268 return;
269
270 lockdep_assert_held(&rdtgroup_mutex);
271
272 entry = __rmid_entry(rmid);
273
274 if (is_llc_occupancy_enabled())
275 add_rmid_to_limbo(entry);
276 else
277 list_add_tail(&entry->list, &rmid_free_lru);
278}
279
280static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
281{
282 u64 shift = 64 - width, chunks;
283
284 chunks = (cur_msr << shift) - (prev_msr << shift);
285 return chunks >>= shift;
286}
287
288static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
289{
290 struct mbm_state *m;
291 u64 chunks, tval;
292
293 tval = __rmid_read(rmid, rr->evtid);
294 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
295 return tval;
296 }
297 switch (rr->evtid) {
298 case QOS_L3_OCCUP_EVENT_ID:
299 rr->val += tval;
300 return 0;
301 case QOS_L3_MBM_TOTAL_EVENT_ID:
302 m = &rr->d->mbm_total[rmid];
303 break;
304 case QOS_L3_MBM_LOCAL_EVENT_ID:
305 m = &rr->d->mbm_local[rmid];
306 break;
307 default:
308
309
310
311
312 return RMID_VAL_ERROR;
313 }
314
315 if (rr->first) {
316 memset(m, 0, sizeof(struct mbm_state));
317 m->prev_bw_msr = m->prev_msr = tval;
318 return 0;
319 }
320
321 chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width);
322 m->chunks += chunks;
323 m->prev_msr = tval;
324
325 rr->val += get_corrected_mbm_count(rmid, m->chunks);
326
327 return 0;
328}
329
330
331
332
333
334static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
335{
336 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
337 struct mbm_state *m = &rr->d->mbm_local[rmid];
338 u64 tval, cur_bw, chunks;
339
340 tval = __rmid_read(rmid, rr->evtid);
341 if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
342 return;
343
344 chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
345 cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20;
346
347 if (m->delta_comp)
348 m->delta_bw = abs(cur_bw - m->prev_bw);
349 m->delta_comp = false;
350 m->prev_bw = cur_bw;
351 m->prev_bw_msr = tval;
352}
353
354
355
356
357
358void mon_event_count(void *info)
359{
360 struct rdtgroup *rdtgrp, *entry;
361 struct rmid_read *rr = info;
362 struct list_head *head;
363 u64 ret_val;
364
365 rdtgrp = rr->rgrp;
366
367 ret_val = __mon_event_count(rdtgrp->mon.rmid, rr);
368
369
370
371
372
373
374 head = &rdtgrp->mon.crdtgrp_list;
375
376 if (rdtgrp->type == RDTCTRL_GROUP) {
377 list_for_each_entry(entry, head, mon.crdtgrp_list) {
378 if (__mon_event_count(entry->mon.rmid, rr) == 0)
379 ret_val = 0;
380 }
381 }
382
383
384 if (ret_val)
385 rr->val = ret_val;
386}
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
421{
422 u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
423 struct mbm_state *pmbm_data, *cmbm_data;
424 u32 cur_bw, delta_bw, user_bw;
425 struct rdt_resource *r_mba;
426 struct rdt_domain *dom_mba;
427 struct list_head *head;
428 struct rdtgroup *entry;
429
430 if (!is_mbm_local_enabled())
431 return;
432
433 r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
434 closid = rgrp->closid;
435 rmid = rgrp->mon.rmid;
436 pmbm_data = &dom_mbm->mbm_local[rmid];
437
438 dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
439 if (!dom_mba) {
440 pr_warn_once("Failure to get domain for MBA update\n");
441 return;
442 }
443
444 cur_bw = pmbm_data->prev_bw;
445 user_bw = dom_mba->mbps_val[closid];
446 delta_bw = pmbm_data->delta_bw;
447 cur_msr_val = dom_mba->ctrl_val[closid];
448
449
450
451
452 head = &rgrp->mon.crdtgrp_list;
453 list_for_each_entry(entry, head, mon.crdtgrp_list) {
454 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
455 cur_bw += cmbm_data->prev_bw;
456 delta_bw += cmbm_data->delta_bw;
457 }
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
474 new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
475 } else if (cur_msr_val < MAX_MBA_BW &&
476 (user_bw > (cur_bw + delta_bw))) {
477 new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
478 } else {
479 return;
480 }
481
482 cur_msr = r_mba->msr_base + closid;
483 wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
484 dom_mba->ctrl_val[closid] = new_msr_val;
485
486
487
488
489
490
491
492
493
494
495
496 pmbm_data->delta_comp = true;
497 list_for_each_entry(entry, head, mon.crdtgrp_list) {
498 cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
499 cmbm_data->delta_comp = true;
500 }
501}
502
503static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
504{
505 struct rmid_read rr;
506
507 rr.first = false;
508 rr.r = r;
509 rr.d = d;
510
511
512
513
514
515 if (is_mbm_total_enabled()) {
516 rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
517 __mon_event_count(rmid, &rr);
518 }
519 if (is_mbm_local_enabled()) {
520 rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
521 __mon_event_count(rmid, &rr);
522
523
524
525
526
527
528 if (is_mba_sc(NULL))
529 mbm_bw_count(rmid, &rr);
530 }
531}
532
533
534
535
536
537void cqm_handle_limbo(struct work_struct *work)
538{
539 unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
540 int cpu = smp_processor_id();
541 struct rdt_resource *r;
542 struct rdt_domain *d;
543
544 mutex_lock(&rdtgroup_mutex);
545
546 r = &rdt_resources_all[RDT_RESOURCE_L3];
547 d = container_of(work, struct rdt_domain, cqm_limbo.work);
548
549 __check_limbo(d, false);
550
551 if (has_busy_rmid(r, d))
552 schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
553
554 mutex_unlock(&rdtgroup_mutex);
555}
556
557void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
558{
559 unsigned long delay = msecs_to_jiffies(delay_ms);
560 int cpu;
561
562 cpu = cpumask_any(&dom->cpu_mask);
563 dom->cqm_work_cpu = cpu;
564
565 schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
566}
567
568void mbm_handle_overflow(struct work_struct *work)
569{
570 unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
571 struct rdtgroup *prgrp, *crgrp;
572 int cpu = smp_processor_id();
573 struct list_head *head;
574 struct rdt_resource *r;
575 struct rdt_domain *d;
576
577 mutex_lock(&rdtgroup_mutex);
578
579 if (!static_branch_likely(&rdt_mon_enable_key))
580 goto out_unlock;
581
582 r = &rdt_resources_all[RDT_RESOURCE_L3];
583 d = container_of(work, struct rdt_domain, mbm_over.work);
584
585 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
586 mbm_update(r, d, prgrp->mon.rmid);
587
588 head = &prgrp->mon.crdtgrp_list;
589 list_for_each_entry(crgrp, head, mon.crdtgrp_list)
590 mbm_update(r, d, crgrp->mon.rmid);
591
592 if (is_mba_sc(NULL))
593 update_mba_bw(prgrp, d);
594 }
595
596 schedule_delayed_work_on(cpu, &d->mbm_over, delay);
597
598out_unlock:
599 mutex_unlock(&rdtgroup_mutex);
600}
601
602void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
603{
604 unsigned long delay = msecs_to_jiffies(delay_ms);
605 int cpu;
606
607 if (!static_branch_likely(&rdt_mon_enable_key))
608 return;
609 cpu = cpumask_any(&dom->cpu_mask);
610 dom->mbm_work_cpu = cpu;
611 schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
612}
613
614static int dom_data_init(struct rdt_resource *r)
615{
616 struct rmid_entry *entry = NULL;
617 int i, nr_rmids;
618
619 nr_rmids = r->num_rmid;
620 rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
621 if (!rmid_ptrs)
622 return -ENOMEM;
623
624 for (i = 0; i < nr_rmids; i++) {
625 entry = &rmid_ptrs[i];
626 INIT_LIST_HEAD(&entry->list);
627
628 entry->rmid = i;
629 list_add_tail(&entry->list, &rmid_free_lru);
630 }
631
632
633
634
635
636 entry = __rmid_entry(0);
637 list_del(&entry->list);
638
639 return 0;
640}
641
642static struct mon_evt llc_occupancy_event = {
643 .name = "llc_occupancy",
644 .evtid = QOS_L3_OCCUP_EVENT_ID,
645};
646
647static struct mon_evt mbm_total_event = {
648 .name = "mbm_total_bytes",
649 .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
650};
651
652static struct mon_evt mbm_local_event = {
653 .name = "mbm_local_bytes",
654 .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
655};
656
657
658
659
660
661
662
663
664static void l3_mon_evt_init(struct rdt_resource *r)
665{
666 INIT_LIST_HEAD(&r->evt_list);
667
668 if (is_llc_occupancy_enabled())
669 list_add_tail(&llc_occupancy_event.list, &r->evt_list);
670 if (is_mbm_total_enabled())
671 list_add_tail(&mbm_total_event.list, &r->evt_list);
672 if (is_mbm_local_enabled())
673 list_add_tail(&mbm_local_event.list, &r->evt_list);
674}
675
676int rdt_get_mon_l3_config(struct rdt_resource *r)
677{
678 unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
679 unsigned int cl_size = boot_cpu_data.x86_cache_size;
680 int ret;
681
682 r->mon_scale = boot_cpu_data.x86_cache_occ_scale;
683 r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
684 r->mbm_width = MBM_CNTR_WIDTH_BASE;
685
686 if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
687 r->mbm_width += mbm_offset;
688 else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
689 pr_warn("Ignoring impossible MBM counter offset\n");
690
691
692
693
694
695
696
697
698 resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid;
699
700
701 resctrl_cqm_threshold /= r->mon_scale;
702
703 ret = dom_data_init(r);
704 if (ret)
705 return ret;
706
707 l3_mon_evt_init(r);
708
709 r->mon_capable = true;
710 r->mon_enabled = true;
711
712 return 0;
713}
714
715void __init intel_rdt_mbm_apply_quirk(void)
716{
717 int cf_index;
718
719 cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
720 if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
721 pr_info("No MBM correction factor available\n");
722 return;
723 }
724
725 mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
726 mbm_cf = mbm_cf_table[cf_index].cf;
727}
728