1
2
3
4
5#include <linux/module.h>
6#include <linux/slab.h>
7#include <linux/blkdev.h>
8#include <linux/cgroup.h>
9#include <linux/ktime.h>
10#include <linux/rbtree.h>
11#include <linux/ioprio.h>
12#include <linux/sbitmap.h>
13#include <linux/delay.h>
14
15#include "elevator.h"
16#include "bfq-iosched.h"
17
18#ifdef CONFIG_BFQ_CGROUP_DEBUG
19static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp)
20{
21 int ret;
22
23 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
24 if (ret)
25 return ret;
26
27 atomic64_set(&stat->aux_cnt, 0);
28 return 0;
29}
30
31static void bfq_stat_exit(struct bfq_stat *stat)
32{
33 percpu_counter_destroy(&stat->cpu_cnt);
34}
35
36
37
38
39
40
41
42
43
44static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val)
45{
46 percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
47}
48
49
50
51
52
53static inline uint64_t bfq_stat_read(struct bfq_stat *stat)
54{
55 return percpu_counter_sum_positive(&stat->cpu_cnt);
56}
57
58
59
60
61
62static inline void bfq_stat_reset(struct bfq_stat *stat)
63{
64 percpu_counter_set(&stat->cpu_cnt, 0);
65 atomic64_set(&stat->aux_cnt, 0);
66}
67
68
69
70
71
72
73
74
75static inline void bfq_stat_add_aux(struct bfq_stat *to,
76 struct bfq_stat *from)
77{
78 atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt),
79 &to->aux_cnt);
80}
81
82
83
84
85
86
87
88
89
90static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
91 int off)
92{
93 return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off));
94}
95
96
97enum bfqg_stats_flags {
98 BFQG_stats_waiting = 0,
99 BFQG_stats_idling,
100 BFQG_stats_empty,
101};
102
103#define BFQG_FLAG_FNS(name) \
104static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
105{ \
106 stats->flags |= (1 << BFQG_stats_##name); \
107} \
108static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
109{ \
110 stats->flags &= ~(1 << BFQG_stats_##name); \
111} \
112static int bfqg_stats_##name(struct bfqg_stats *stats) \
113{ \
114 return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
115} \
116
117BFQG_FLAG_FNS(waiting)
118BFQG_FLAG_FNS(idling)
119BFQG_FLAG_FNS(empty)
120#undef BFQG_FLAG_FNS
121
122
123static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
124{
125 u64 now;
126
127 if (!bfqg_stats_waiting(stats))
128 return;
129
130 now = ktime_get_ns();
131 if (now > stats->start_group_wait_time)
132 bfq_stat_add(&stats->group_wait_time,
133 now - stats->start_group_wait_time);
134 bfqg_stats_clear_waiting(stats);
135}
136
137
138static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
139 struct bfq_group *curr_bfqg)
140{
141 struct bfqg_stats *stats = &bfqg->stats;
142
143 if (bfqg_stats_waiting(stats))
144 return;
145 if (bfqg == curr_bfqg)
146 return;
147 stats->start_group_wait_time = ktime_get_ns();
148 bfqg_stats_mark_waiting(stats);
149}
150
151
152static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
153{
154 u64 now;
155
156 if (!bfqg_stats_empty(stats))
157 return;
158
159 now = ktime_get_ns();
160 if (now > stats->start_empty_time)
161 bfq_stat_add(&stats->empty_time,
162 now - stats->start_empty_time);
163 bfqg_stats_clear_empty(stats);
164}
165
166void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
167{
168 bfq_stat_add(&bfqg->stats.dequeue, 1);
169}
170
171void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
172{
173 struct bfqg_stats *stats = &bfqg->stats;
174
175 if (blkg_rwstat_total(&stats->queued))
176 return;
177
178
179
180
181
182
183 if (bfqg_stats_empty(stats))
184 return;
185
186 stats->start_empty_time = ktime_get_ns();
187 bfqg_stats_mark_empty(stats);
188}
189
190void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
191{
192 struct bfqg_stats *stats = &bfqg->stats;
193
194 if (bfqg_stats_idling(stats)) {
195 u64 now = ktime_get_ns();
196
197 if (now > stats->start_idle_time)
198 bfq_stat_add(&stats->idle_time,
199 now - stats->start_idle_time);
200 bfqg_stats_clear_idling(stats);
201 }
202}
203
204void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
205{
206 struct bfqg_stats *stats = &bfqg->stats;
207
208 stats->start_idle_time = ktime_get_ns();
209 bfqg_stats_mark_idling(stats);
210}
211
212void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
213{
214 struct bfqg_stats *stats = &bfqg->stats;
215
216 bfq_stat_add(&stats->avg_queue_size_sum,
217 blkg_rwstat_total(&stats->queued));
218 bfq_stat_add(&stats->avg_queue_size_samples, 1);
219 bfqg_stats_update_group_wait_time(stats);
220}
221
222void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
223 blk_opf_t opf)
224{
225 blkg_rwstat_add(&bfqg->stats.queued, opf, 1);
226 bfqg_stats_end_empty_time(&bfqg->stats);
227 if (!(bfqq == bfqg->bfqd->in_service_queue))
228 bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
229}
230
231void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf)
232{
233 blkg_rwstat_add(&bfqg->stats.queued, opf, -1);
234}
235
236void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf)
237{
238 blkg_rwstat_add(&bfqg->stats.merged, opf, 1);
239}
240
241void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
242 u64 io_start_time_ns, blk_opf_t opf)
243{
244 struct bfqg_stats *stats = &bfqg->stats;
245 u64 now = ktime_get_ns();
246
247 if (now > io_start_time_ns)
248 blkg_rwstat_add(&stats->service_time, opf,
249 now - io_start_time_ns);
250 if (io_start_time_ns > start_time_ns)
251 blkg_rwstat_add(&stats->wait_time, opf,
252 io_start_time_ns - start_time_ns);
253}
254
255#else
256
257void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { }
258void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { }
259void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
260 u64 io_start_time_ns, blk_opf_t opf) { }
261void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
262void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
263
264#endif
265
266#ifdef CONFIG_BFQ_GROUP_IOSCHED
267
268
269
270
271
272
273
274static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
275{
276 return pd ? container_of(pd, struct bfq_group, pd) : NULL;
277}
278
279struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
280{
281 return pd_to_blkg(&bfqg->pd);
282}
283
284static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
285{
286 return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
287}
288
289
290
291
292
293
294
295
296static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
297{
298 struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
299
300 return pblkg ? blkg_to_bfqg(pblkg) : NULL;
301}
302
303struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
304{
305 struct bfq_entity *group_entity = bfqq->entity.parent;
306
307 return group_entity ? container_of(group_entity, struct bfq_group,
308 entity) :
309 bfqq->bfqd->root_group;
310}
311
312
313
314
315
316
317static void bfqg_get(struct bfq_group *bfqg)
318{
319 refcount_inc(&bfqg->ref);
320}
321
322static void bfqg_put(struct bfq_group *bfqg)
323{
324 if (refcount_dec_and_test(&bfqg->ref))
325 kfree(bfqg);
326}
327
328static void bfqg_and_blkg_get(struct bfq_group *bfqg)
329{
330
331 bfqg_get(bfqg);
332
333 blkg_get(bfqg_to_blkg(bfqg));
334}
335
336void bfqg_and_blkg_put(struct bfq_group *bfqg)
337{
338 blkg_put(bfqg_to_blkg(bfqg));
339
340 bfqg_put(bfqg);
341}
342
343void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
344{
345 struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
346
347 if (!bfqg)
348 return;
349
350 blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
351 blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
352}
353
354
355static void bfqg_stats_reset(struct bfqg_stats *stats)
356{
357#ifdef CONFIG_BFQ_CGROUP_DEBUG
358
359 blkg_rwstat_reset(&stats->merged);
360 blkg_rwstat_reset(&stats->service_time);
361 blkg_rwstat_reset(&stats->wait_time);
362 bfq_stat_reset(&stats->time);
363 bfq_stat_reset(&stats->avg_queue_size_sum);
364 bfq_stat_reset(&stats->avg_queue_size_samples);
365 bfq_stat_reset(&stats->dequeue);
366 bfq_stat_reset(&stats->group_wait_time);
367 bfq_stat_reset(&stats->idle_time);
368 bfq_stat_reset(&stats->empty_time);
369#endif
370}
371
372
373static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
374{
375 if (!to || !from)
376 return;
377
378#ifdef CONFIG_BFQ_CGROUP_DEBUG
379
380 blkg_rwstat_add_aux(&to->merged, &from->merged);
381 blkg_rwstat_add_aux(&to->service_time, &from->service_time);
382 blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
383 bfq_stat_add_aux(&from->time, &from->time);
384 bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
385 bfq_stat_add_aux(&to->avg_queue_size_samples,
386 &from->avg_queue_size_samples);
387 bfq_stat_add_aux(&to->dequeue, &from->dequeue);
388 bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
389 bfq_stat_add_aux(&to->idle_time, &from->idle_time);
390 bfq_stat_add_aux(&to->empty_time, &from->empty_time);
391#endif
392}
393
394
395
396
397
398
399static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
400{
401 struct bfq_group *parent;
402
403 if (!bfqg)
404 return;
405
406 parent = bfqg_parent(bfqg);
407
408 lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
409
410 if (unlikely(!parent))
411 return;
412
413 bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
414 bfqg_stats_reset(&bfqg->stats);
415}
416
417void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
418{
419 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
420
421 entity->weight = entity->new_weight;
422 entity->orig_weight = entity->new_weight;
423 if (bfqq) {
424 bfqq->ioprio = bfqq->new_ioprio;
425 bfqq->ioprio_class = bfqq->new_ioprio_class;
426
427
428
429
430 bfqg_and_blkg_get(bfqg);
431 }
432 entity->parent = bfqg->my_entity;
433 entity->sched_data = &bfqg->sched_data;
434}
435
436static void bfqg_stats_exit(struct bfqg_stats *stats)
437{
438 blkg_rwstat_exit(&stats->bytes);
439 blkg_rwstat_exit(&stats->ios);
440#ifdef CONFIG_BFQ_CGROUP_DEBUG
441 blkg_rwstat_exit(&stats->merged);
442 blkg_rwstat_exit(&stats->service_time);
443 blkg_rwstat_exit(&stats->wait_time);
444 blkg_rwstat_exit(&stats->queued);
445 bfq_stat_exit(&stats->time);
446 bfq_stat_exit(&stats->avg_queue_size_sum);
447 bfq_stat_exit(&stats->avg_queue_size_samples);
448 bfq_stat_exit(&stats->dequeue);
449 bfq_stat_exit(&stats->group_wait_time);
450 bfq_stat_exit(&stats->idle_time);
451 bfq_stat_exit(&stats->empty_time);
452#endif
453}
454
455static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
456{
457 if (blkg_rwstat_init(&stats->bytes, gfp) ||
458 blkg_rwstat_init(&stats->ios, gfp))
459 goto error;
460
461#ifdef CONFIG_BFQ_CGROUP_DEBUG
462 if (blkg_rwstat_init(&stats->merged, gfp) ||
463 blkg_rwstat_init(&stats->service_time, gfp) ||
464 blkg_rwstat_init(&stats->wait_time, gfp) ||
465 blkg_rwstat_init(&stats->queued, gfp) ||
466 bfq_stat_init(&stats->time, gfp) ||
467 bfq_stat_init(&stats->avg_queue_size_sum, gfp) ||
468 bfq_stat_init(&stats->avg_queue_size_samples, gfp) ||
469 bfq_stat_init(&stats->dequeue, gfp) ||
470 bfq_stat_init(&stats->group_wait_time, gfp) ||
471 bfq_stat_init(&stats->idle_time, gfp) ||
472 bfq_stat_init(&stats->empty_time, gfp))
473 goto error;
474#endif
475
476 return 0;
477
478error:
479 bfqg_stats_exit(stats);
480 return -ENOMEM;
481}
482
483static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
484{
485 return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
486}
487
488static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
489{
490 return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
491}
492
493static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
494{
495 struct bfq_group_data *bgd;
496
497 bgd = kzalloc(sizeof(*bgd), gfp);
498 if (!bgd)
499 return NULL;
500
501 bgd->weight = CGROUP_WEIGHT_DFL;
502 return &bgd->pd;
503}
504
505static void bfq_cpd_free(struct blkcg_policy_data *cpd)
506{
507 kfree(cpd_to_bfqgd(cpd));
508}
509
510static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk,
511 struct blkcg *blkcg, gfp_t gfp)
512{
513 struct bfq_group *bfqg;
514
515 bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id);
516 if (!bfqg)
517 return NULL;
518
519 if (bfqg_stats_init(&bfqg->stats, gfp)) {
520 kfree(bfqg);
521 return NULL;
522 }
523
524
525 refcount_set(&bfqg->ref, 1);
526 return &bfqg->pd;
527}
528
529static void bfq_pd_init(struct blkg_policy_data *pd)
530{
531 struct blkcg_gq *blkg = pd_to_blkg(pd);
532 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
533 struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
534 struct bfq_entity *entity = &bfqg->entity;
535 struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
536
537 entity->orig_weight = entity->weight = entity->new_weight = d->weight;
538 entity->my_sched_data = &bfqg->sched_data;
539 entity->last_bfqq_created = NULL;
540
541 bfqg->my_entity = entity;
542
543
544
545 bfqg->bfqd = bfqd;
546 bfqg->active_entities = 0;
547 bfqg->num_queues_with_pending_reqs = 0;
548 bfqg->rq_pos_tree = RB_ROOT;
549}
550
551static void bfq_pd_free(struct blkg_policy_data *pd)
552{
553 struct bfq_group *bfqg = pd_to_bfqg(pd);
554
555 bfqg_stats_exit(&bfqg->stats);
556 bfqg_put(bfqg);
557}
558
559static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
560{
561 struct bfq_group *bfqg = pd_to_bfqg(pd);
562
563 bfqg_stats_reset(&bfqg->stats);
564}
565
566static void bfq_group_set_parent(struct bfq_group *bfqg,
567 struct bfq_group *parent)
568{
569 struct bfq_entity *entity;
570
571 entity = &bfqg->entity;
572 entity->parent = parent->my_entity;
573 entity->sched_data = &parent->sched_data;
574}
575
576static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg)
577{
578 struct bfq_group *parent;
579 struct bfq_entity *entity;
580
581
582
583
584
585
586 entity = &bfqg->entity;
587 for_each_entity(entity) {
588 struct bfq_group *curr_bfqg = container_of(entity,
589 struct bfq_group, entity);
590 if (curr_bfqg != bfqd->root_group) {
591 parent = bfqg_parent(curr_bfqg);
592 if (!parent)
593 parent = bfqd->root_group;
594 bfq_group_set_parent(curr_bfqg, parent);
595 }
596 }
597}
598
599struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
600{
601 struct blkcg_gq *blkg = bio->bi_blkg;
602 struct bfq_group *bfqg;
603
604 while (blkg) {
605 if (!blkg->online) {
606 blkg = blkg->parent;
607 continue;
608 }
609 bfqg = blkg_to_bfqg(blkg);
610 if (bfqg->pd.online) {
611 bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
612 return bfqg;
613 }
614 blkg = blkg->parent;
615 }
616 bio_associate_blkg_from_css(bio,
617 &bfqg_to_blkg(bfqd->root_group)->blkcg->css);
618 return bfqd->root_group;
619}
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
636 struct bfq_group *bfqg)
637{
638 struct bfq_entity *entity = &bfqq->entity;
639 struct bfq_group *old_parent = bfqq_group(bfqq);
640 bool has_pending_reqs = false;
641
642
643
644
645
646 if (old_parent == bfqg)
647 return;
648
649
650
651
652
653 if (bfqq == &bfqd->oom_bfqq)
654 return;
655
656
657
658
659 bfqq->ref++;
660
661 if (entity->in_groups_with_pending_reqs) {
662 has_pending_reqs = true;
663 bfq_del_bfqq_in_groups_with_pending_reqs(bfqq);
664 }
665
666
667
668
669
670
671
672 if (bfqq == bfqd->in_service_queue)
673 bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
674 false, BFQQE_PREEMPTED);
675
676 if (bfq_bfqq_busy(bfqq))
677 bfq_deactivate_bfqq(bfqd, bfqq, false, false);
678 else if (entity->on_st_or_in_serv)
679 bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
680 bfqg_and_blkg_put(old_parent);
681
682 if (entity->parent &&
683 entity->parent->last_bfqq_created == bfqq)
684 entity->parent->last_bfqq_created = NULL;
685 else if (bfqd->last_bfqq_created == bfqq)
686 bfqd->last_bfqq_created = NULL;
687
688 entity->parent = bfqg->my_entity;
689 entity->sched_data = &bfqg->sched_data;
690
691 bfqg_and_blkg_get(bfqg);
692
693 if (has_pending_reqs)
694 bfq_add_bfqq_in_groups_with_pending_reqs(bfqq);
695
696 if (bfq_bfqq_busy(bfqq)) {
697 if (unlikely(!bfqd->nonrot_with_queueing))
698 bfq_pos_tree_add_move(bfqd, bfqq);
699 bfq_activate_bfqq(bfqd, bfqq);
700 }
701
702 if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver)
703 bfq_schedule_dispatch(bfqd);
704
705 bfq_put_queue(bfqq);
706}
707
708static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
709 struct bfq_queue *sync_bfqq,
710 struct bfq_io_cq *bic,
711 struct bfq_group *bfqg,
712 unsigned int act_idx)
713{
714 struct bfq_queue *bfqq;
715
716 if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
717
718 if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
719 bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
720 return;
721 }
722
723
724
725
726
727
728 for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
729 if (bfqq->entity.sched_data != &bfqg->sched_data)
730 break;
731 if (bfqq) {
732
733
734
735
736
737
738
739
740
741
742 bfq_put_cooperator(sync_bfqq);
743 bic_set_bfqq(bic, NULL, true, act_idx);
744 bfq_release_process_ref(bfqd, sync_bfqq);
745 }
746}
747
748
749
750
751
752
753
754
755
756
757
758static void __bfq_bic_change_cgroup(struct bfq_data *bfqd,
759 struct bfq_io_cq *bic,
760 struct bfq_group *bfqg)
761{
762 unsigned int act_idx;
763
764 for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) {
765 struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx);
766 struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx);
767
768 if (async_bfqq &&
769 async_bfqq->entity.sched_data != &bfqg->sched_data) {
770 bic_set_bfqq(bic, NULL, false, act_idx);
771 bfq_release_process_ref(bfqd, async_bfqq);
772 }
773
774 if (sync_bfqq)
775 bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx);
776 }
777}
778
779void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
780{
781 struct bfq_data *bfqd = bic_to_bfqd(bic);
782 struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio);
783 uint64_t serial_nr;
784
785 serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr;
786
787
788
789
790
791 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
792 return;
793
794
795
796
797
798 bfq_link_bfqg(bfqd, bfqg);
799 __bfq_bic_change_cgroup(bfqd, bic, bfqg);
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850 blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
851 bic->blkcg_serial_nr = serial_nr;
852}
853
854
855
856
857
858static void bfq_flush_idle_tree(struct bfq_service_tree *st)
859{
860 struct bfq_entity *entity = st->first_idle;
861
862 for (; entity ; entity = st->first_idle)
863 __bfq_deactivate_entity(entity, false);
864}
865
866
867
868
869
870
871
872
873static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
874 struct bfq_entity *entity,
875 int ioprio_class)
876{
877 struct bfq_queue *bfqq;
878 struct bfq_entity *child_entity = entity;
879
880 while (child_entity->my_sched_data) {
881 struct bfq_sched_data *child_sd = child_entity->my_sched_data;
882 struct bfq_service_tree *child_st = child_sd->service_tree +
883 ioprio_class;
884 struct rb_root *child_active = &child_st->active;
885
886 child_entity = bfq_entity_of(rb_first(child_active));
887
888 if (!child_entity)
889 child_entity = child_sd->in_service_entity;
890 }
891
892 bfqq = bfq_entity_to_bfqq(child_entity);
893 bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
894}
895
896
897
898
899
900
901
902
903static void bfq_reparent_active_queues(struct bfq_data *bfqd,
904 struct bfq_group *bfqg,
905 struct bfq_service_tree *st,
906 int ioprio_class)
907{
908 struct rb_root *active = &st->active;
909 struct bfq_entity *entity;
910
911 while ((entity = bfq_entity_of(rb_first(active))))
912 bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
913
914 if (bfqg->sched_data.in_service_entity)
915 bfq_reparent_leaf_entity(bfqd,
916 bfqg->sched_data.in_service_entity,
917 ioprio_class);
918}
919
920
921
922
923
924
925
926
927
928static void bfq_pd_offline(struct blkg_policy_data *pd)
929{
930 struct bfq_service_tree *st;
931 struct bfq_group *bfqg = pd_to_bfqg(pd);
932 struct bfq_data *bfqd = bfqg->bfqd;
933 struct bfq_entity *entity = bfqg->my_entity;
934 unsigned long flags;
935 int i;
936
937 spin_lock_irqsave(&bfqd->lock, flags);
938
939 if (!entity)
940 goto put_async_queues;
941
942
943
944
945
946 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
947 st = bfqg->sched_data.service_tree + i;
948
949
950
951
952
953
954
955
956
957
958
959
960
961 bfq_reparent_active_queues(bfqd, bfqg, st, i);
962
963
964
965
966
967
968
969
970
971
972
973
974 bfq_flush_idle_tree(st);
975 }
976
977 __bfq_deactivate_entity(entity, false);
978
979put_async_queues:
980 bfq_put_async_queues(bfqd, bfqg);
981
982 spin_unlock_irqrestore(&bfqd->lock, flags);
983
984
985
986
987
988
989 bfqg_stats_xfer_dead(bfqg);
990}
991
992void bfq_end_wr_async(struct bfq_data *bfqd)
993{
994 struct blkcg_gq *blkg;
995
996 list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
997 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
998
999 bfq_end_wr_async_queues(bfqd, bfqg);
1000 }
1001 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
1002}
1003
1004static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
1005{
1006 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1007 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1008 unsigned int val = 0;
1009
1010 if (bfqgd)
1011 val = bfqgd->weight;
1012
1013 seq_printf(sf, "%u\n", val);
1014
1015 return 0;
1016}
1017
1018static u64 bfqg_prfill_weight_device(struct seq_file *sf,
1019 struct blkg_policy_data *pd, int off)
1020{
1021 struct bfq_group *bfqg = pd_to_bfqg(pd);
1022
1023 if (!bfqg->entity.dev_weight)
1024 return 0;
1025 return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
1026}
1027
1028static int bfq_io_show_weight(struct seq_file *sf, void *v)
1029{
1030 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
1031 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1032
1033 seq_printf(sf, "default %u\n", bfqgd->weight);
1034 blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
1035 &blkcg_policy_bfq, 0, false);
1036 return 0;
1037}
1038
1039static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
1040{
1041 weight = dev_weight ?: weight;
1042
1043 bfqg->entity.dev_weight = dev_weight;
1044
1045
1046
1047
1048
1049
1050 if ((unsigned short)weight != bfqg->entity.new_weight) {
1051 bfqg->entity.new_weight = (unsigned short)weight;
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067 smp_wmb();
1068 bfqg->entity.prio_changed = 1;
1069 }
1070}
1071
1072static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
1073 struct cftype *cftype,
1074 u64 val)
1075{
1076 struct blkcg *blkcg = css_to_blkcg(css);
1077 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1078 struct blkcg_gq *blkg;
1079 int ret = -ERANGE;
1080
1081 if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
1082 return ret;
1083
1084 ret = 0;
1085 spin_lock_irq(&blkcg->lock);
1086 bfqgd->weight = (unsigned short)val;
1087 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1088 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
1089
1090 if (bfqg)
1091 bfq_group_set_weight(bfqg, val, 0);
1092 }
1093 spin_unlock_irq(&blkcg->lock);
1094
1095 return ret;
1096}
1097
1098static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
1099 char *buf, size_t nbytes,
1100 loff_t off)
1101{
1102 int ret;
1103 struct blkg_conf_ctx ctx;
1104 struct blkcg *blkcg = css_to_blkcg(of_css(of));
1105 struct bfq_group *bfqg;
1106 u64 v;
1107
1108 blkg_conf_init(&ctx, buf);
1109
1110 ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
1111 if (ret)
1112 goto out;
1113
1114 if (sscanf(ctx.body, "%llu", &v) == 1) {
1115
1116 ret = -ERANGE;
1117 if (!v)
1118 goto out;
1119 } else if (!strcmp(strim(ctx.body), "default")) {
1120 v = 0;
1121 } else {
1122 ret = -EINVAL;
1123 goto out;
1124 }
1125
1126 bfqg = blkg_to_bfqg(ctx.blkg);
1127
1128 ret = -ERANGE;
1129 if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
1130 bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
1131 ret = 0;
1132 }
1133out:
1134 blkg_conf_exit(&ctx);
1135 return ret ?: nbytes;
1136}
1137
1138static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
1139 char *buf, size_t nbytes,
1140 loff_t off)
1141{
1142 char *endp;
1143 int ret;
1144 u64 v;
1145
1146 buf = strim(buf);
1147
1148
1149 v = simple_strtoull(buf, &endp, 0);
1150 if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
1151 ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
1152 return ret ?: nbytes;
1153 }
1154
1155 return bfq_io_set_device_weight(of, buf, nbytes, off);
1156}
1157
1158static int bfqg_print_rwstat(struct seq_file *sf, void *v)
1159{
1160 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1161 &blkcg_policy_bfq, seq_cft(sf)->private, true);
1162 return 0;
1163}
1164
1165static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
1166 struct blkg_policy_data *pd, int off)
1167{
1168 struct blkg_rwstat_sample sum;
1169
1170 blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
1171 return __blkg_prfill_rwstat(sf, pd, &sum);
1172}
1173
1174static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
1175{
1176 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1177 bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
1178 seq_cft(sf)->private, true);
1179 return 0;
1180}
1181
1182#ifdef CONFIG_BFQ_CGROUP_DEBUG
1183static int bfqg_print_stat(struct seq_file *sf, void *v)
1184{
1185 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1186 &blkcg_policy_bfq, seq_cft(sf)->private, false);
1187 return 0;
1188}
1189
1190static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
1191 struct blkg_policy_data *pd, int off)
1192{
1193 struct blkcg_gq *blkg = pd_to_blkg(pd);
1194 struct blkcg_gq *pos_blkg;
1195 struct cgroup_subsys_state *pos_css;
1196 u64 sum = 0;
1197
1198 lockdep_assert_held(&blkg->q->queue_lock);
1199
1200 rcu_read_lock();
1201 blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
1202 struct bfq_stat *stat;
1203
1204 if (!pos_blkg->online)
1205 continue;
1206
1207 stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
1208 sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
1209 }
1210 rcu_read_unlock();
1211
1212 return __blkg_prfill_u64(sf, pd, sum);
1213}
1214
1215static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
1216{
1217 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1218 bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
1219 seq_cft(sf)->private, false);
1220 return 0;
1221}
1222
1223static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1224 int off)
1225{
1226 struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
1227 u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
1228
1229 return __blkg_prfill_u64(sf, pd, sum >> 9);
1230}
1231
1232static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
1233{
1234 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1235 bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
1236 return 0;
1237}
1238
1239static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
1240 struct blkg_policy_data *pd, int off)
1241{
1242 struct blkg_rwstat_sample tmp;
1243
1244 blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
1245 offsetof(struct bfq_group, stats.bytes), &tmp);
1246
1247 return __blkg_prfill_u64(sf, pd,
1248 (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
1249}
1250
1251static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1252{
1253 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1254 bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
1255 false);
1256 return 0;
1257}
1258
1259static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
1260 struct blkg_policy_data *pd, int off)
1261{
1262 struct bfq_group *bfqg = pd_to_bfqg(pd);
1263 u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples);
1264 u64 v = 0;
1265
1266 if (samples) {
1267 v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum);
1268 v = div64_u64(v, samples);
1269 }
1270 __blkg_prfill_u64(sf, pd, v);
1271 return 0;
1272}
1273
1274
1275static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
1276{
1277 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1278 bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
1279 0, false);
1280 return 0;
1281}
1282#endif
1283
1284struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1285{
1286 int ret;
1287
1288 ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq);
1289 if (ret)
1290 return NULL;
1291
1292 return blkg_to_bfqg(bfqd->queue->root_blkg);
1293}
1294
1295struct blkcg_policy blkcg_policy_bfq = {
1296 .dfl_cftypes = bfq_blkg_files,
1297 .legacy_cftypes = bfq_blkcg_legacy_files,
1298
1299 .cpd_alloc_fn = bfq_cpd_alloc,
1300 .cpd_free_fn = bfq_cpd_free,
1301
1302 .pd_alloc_fn = bfq_pd_alloc,
1303 .pd_init_fn = bfq_pd_init,
1304 .pd_offline_fn = bfq_pd_offline,
1305 .pd_free_fn = bfq_pd_free,
1306 .pd_reset_stats_fn = bfq_pd_reset_stats,
1307};
1308
1309struct cftype bfq_blkcg_legacy_files[] = {
1310 {
1311 .name = "bfq.weight",
1312 .flags = CFTYPE_NOT_ON_ROOT,
1313 .seq_show = bfq_io_show_weight_legacy,
1314 .write_u64 = bfq_io_set_weight_legacy,
1315 },
1316 {
1317 .name = "bfq.weight_device",
1318 .flags = CFTYPE_NOT_ON_ROOT,
1319 .seq_show = bfq_io_show_weight,
1320 .write = bfq_io_set_weight,
1321 },
1322
1323
1324 {
1325 .name = "bfq.io_service_bytes",
1326 .private = offsetof(struct bfq_group, stats.bytes),
1327 .seq_show = bfqg_print_rwstat,
1328 },
1329 {
1330 .name = "bfq.io_serviced",
1331 .private = offsetof(struct bfq_group, stats.ios),
1332 .seq_show = bfqg_print_rwstat,
1333 },
1334#ifdef CONFIG_BFQ_CGROUP_DEBUG
1335 {
1336 .name = "bfq.time",
1337 .private = offsetof(struct bfq_group, stats.time),
1338 .seq_show = bfqg_print_stat,
1339 },
1340 {
1341 .name = "bfq.sectors",
1342 .seq_show = bfqg_print_stat_sectors,
1343 },
1344 {
1345 .name = "bfq.io_service_time",
1346 .private = offsetof(struct bfq_group, stats.service_time),
1347 .seq_show = bfqg_print_rwstat,
1348 },
1349 {
1350 .name = "bfq.io_wait_time",
1351 .private = offsetof(struct bfq_group, stats.wait_time),
1352 .seq_show = bfqg_print_rwstat,
1353 },
1354 {
1355 .name = "bfq.io_merged",
1356 .private = offsetof(struct bfq_group, stats.merged),
1357 .seq_show = bfqg_print_rwstat,
1358 },
1359 {
1360 .name = "bfq.io_queued",
1361 .private = offsetof(struct bfq_group, stats.queued),
1362 .seq_show = bfqg_print_rwstat,
1363 },
1364#endif
1365
1366
1367 {
1368 .name = "bfq.io_service_bytes_recursive",
1369 .private = offsetof(struct bfq_group, stats.bytes),
1370 .seq_show = bfqg_print_rwstat_recursive,
1371 },
1372 {
1373 .name = "bfq.io_serviced_recursive",
1374 .private = offsetof(struct bfq_group, stats.ios),
1375 .seq_show = bfqg_print_rwstat_recursive,
1376 },
1377#ifdef CONFIG_BFQ_CGROUP_DEBUG
1378 {
1379 .name = "bfq.time_recursive",
1380 .private = offsetof(struct bfq_group, stats.time),
1381 .seq_show = bfqg_print_stat_recursive,
1382 },
1383 {
1384 .name = "bfq.sectors_recursive",
1385 .seq_show = bfqg_print_stat_sectors_recursive,
1386 },
1387 {
1388 .name = "bfq.io_service_time_recursive",
1389 .private = offsetof(struct bfq_group, stats.service_time),
1390 .seq_show = bfqg_print_rwstat_recursive,
1391 },
1392 {
1393 .name = "bfq.io_wait_time_recursive",
1394 .private = offsetof(struct bfq_group, stats.wait_time),
1395 .seq_show = bfqg_print_rwstat_recursive,
1396 },
1397 {
1398 .name = "bfq.io_merged_recursive",
1399 .private = offsetof(struct bfq_group, stats.merged),
1400 .seq_show = bfqg_print_rwstat_recursive,
1401 },
1402 {
1403 .name = "bfq.io_queued_recursive",
1404 .private = offsetof(struct bfq_group, stats.queued),
1405 .seq_show = bfqg_print_rwstat_recursive,
1406 },
1407 {
1408 .name = "bfq.avg_queue_size",
1409 .seq_show = bfqg_print_avg_queue_size,
1410 },
1411 {
1412 .name = "bfq.group_wait_time",
1413 .private = offsetof(struct bfq_group, stats.group_wait_time),
1414 .seq_show = bfqg_print_stat,
1415 },
1416 {
1417 .name = "bfq.idle_time",
1418 .private = offsetof(struct bfq_group, stats.idle_time),
1419 .seq_show = bfqg_print_stat,
1420 },
1421 {
1422 .name = "bfq.empty_time",
1423 .private = offsetof(struct bfq_group, stats.empty_time),
1424 .seq_show = bfqg_print_stat,
1425 },
1426 {
1427 .name = "bfq.dequeue",
1428 .private = offsetof(struct bfq_group, stats.dequeue),
1429 .seq_show = bfqg_print_stat,
1430 },
1431#endif
1432 { }
1433};
1434
1435struct cftype bfq_blkg_files[] = {
1436 {
1437 .name = "bfq.weight",
1438 .flags = CFTYPE_NOT_ON_ROOT,
1439 .seq_show = bfq_io_show_weight,
1440 .write = bfq_io_set_weight,
1441 },
1442 {}
1443};
1444
1445#else
1446
1447void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1448 struct bfq_group *bfqg) {}
1449
1450void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
1451{
1452 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
1453
1454 entity->weight = entity->new_weight;
1455 entity->orig_weight = entity->new_weight;
1456 if (bfqq) {
1457 bfqq->ioprio = bfqq->new_ioprio;
1458 bfqq->ioprio_class = bfqq->new_ioprio_class;
1459 }
1460 entity->sched_data = &bfqg->sched_data;
1461}
1462
1463void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
1464
1465void bfq_end_wr_async(struct bfq_data *bfqd)
1466{
1467 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
1468}
1469
1470struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
1471{
1472 return bfqd->root_group;
1473}
1474
1475struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
1476{
1477 return bfqq->bfqd->root_group;
1478}
1479
1480void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
1481
1482struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1483{
1484 struct bfq_group *bfqg;
1485 int i;
1486
1487 bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
1488 if (!bfqg)
1489 return NULL;
1490
1491 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
1492 bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
1493
1494 return bfqg;
1495}
1496#endif
1497