1
2
3
4
5#include <linux/module.h>
6#include <linux/slab.h>
7#include <linux/blkdev.h>
8#include <linux/cgroup.h>
9#include <linux/elevator.h>
10#include <linux/ktime.h>
11#include <linux/rbtree.h>
12#include <linux/ioprio.h>
13#include <linux/sbitmap.h>
14#include <linux/delay.h>
15
16#include "bfq-iosched.h"
17
18#ifdef CONFIG_BFQ_CGROUP_DEBUG
19static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp)
20{
21 int ret;
22
23 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
24 if (ret)
25 return ret;
26
27 atomic64_set(&stat->aux_cnt, 0);
28 return 0;
29}
30
31static void bfq_stat_exit(struct bfq_stat *stat)
32{
33 percpu_counter_destroy(&stat->cpu_cnt);
34}
35
36
37
38
39
40
41
42
43
44static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val)
45{
46 percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
47}
48
49
50
51
52
53static inline uint64_t bfq_stat_read(struct bfq_stat *stat)
54{
55 return percpu_counter_sum_positive(&stat->cpu_cnt);
56}
57
58
59
60
61
62static inline void bfq_stat_reset(struct bfq_stat *stat)
63{
64 percpu_counter_set(&stat->cpu_cnt, 0);
65 atomic64_set(&stat->aux_cnt, 0);
66}
67
68
69
70
71
72
73
74
75static inline void bfq_stat_add_aux(struct bfq_stat *to,
76 struct bfq_stat *from)
77{
78 atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt),
79 &to->aux_cnt);
80}
81
82
83
84
85
86
87
88
89
90static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
91 int off)
92{
93 return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off));
94}
95
96
97enum bfqg_stats_flags {
98 BFQG_stats_waiting = 0,
99 BFQG_stats_idling,
100 BFQG_stats_empty,
101};
102
103#define BFQG_FLAG_FNS(name) \
104static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
105{ \
106 stats->flags |= (1 << BFQG_stats_##name); \
107} \
108static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
109{ \
110 stats->flags &= ~(1 << BFQG_stats_##name); \
111} \
112static int bfqg_stats_##name(struct bfqg_stats *stats) \
113{ \
114 return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
115} \
116
117BFQG_FLAG_FNS(waiting)
118BFQG_FLAG_FNS(idling)
119BFQG_FLAG_FNS(empty)
120#undef BFQG_FLAG_FNS
121
122
123static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
124{
125 u64 now;
126
127 if (!bfqg_stats_waiting(stats))
128 return;
129
130 now = ktime_get_ns();
131 if (now > stats->start_group_wait_time)
132 bfq_stat_add(&stats->group_wait_time,
133 now - stats->start_group_wait_time);
134 bfqg_stats_clear_waiting(stats);
135}
136
137
138static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
139 struct bfq_group *curr_bfqg)
140{
141 struct bfqg_stats *stats = &bfqg->stats;
142
143 if (bfqg_stats_waiting(stats))
144 return;
145 if (bfqg == curr_bfqg)
146 return;
147 stats->start_group_wait_time = ktime_get_ns();
148 bfqg_stats_mark_waiting(stats);
149}
150
151
152static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
153{
154 u64 now;
155
156 if (!bfqg_stats_empty(stats))
157 return;
158
159 now = ktime_get_ns();
160 if (now > stats->start_empty_time)
161 bfq_stat_add(&stats->empty_time,
162 now - stats->start_empty_time);
163 bfqg_stats_clear_empty(stats);
164}
165
166void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
167{
168 bfq_stat_add(&bfqg->stats.dequeue, 1);
169}
170
171void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
172{
173 struct bfqg_stats *stats = &bfqg->stats;
174
175 if (blkg_rwstat_total(&stats->queued))
176 return;
177
178
179
180
181
182
183 if (bfqg_stats_empty(stats))
184 return;
185
186 stats->start_empty_time = ktime_get_ns();
187 bfqg_stats_mark_empty(stats);
188}
189
190void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
191{
192 struct bfqg_stats *stats = &bfqg->stats;
193
194 if (bfqg_stats_idling(stats)) {
195 u64 now = ktime_get_ns();
196
197 if (now > stats->start_idle_time)
198 bfq_stat_add(&stats->idle_time,
199 now - stats->start_idle_time);
200 bfqg_stats_clear_idling(stats);
201 }
202}
203
204void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
205{
206 struct bfqg_stats *stats = &bfqg->stats;
207
208 stats->start_idle_time = ktime_get_ns();
209 bfqg_stats_mark_idling(stats);
210}
211
212void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
213{
214 struct bfqg_stats *stats = &bfqg->stats;
215
216 bfq_stat_add(&stats->avg_queue_size_sum,
217 blkg_rwstat_total(&stats->queued));
218 bfq_stat_add(&stats->avg_queue_size_samples, 1);
219 bfqg_stats_update_group_wait_time(stats);
220}
221
222void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
223 unsigned int op)
224{
225 blkg_rwstat_add(&bfqg->stats.queued, op, 1);
226 bfqg_stats_end_empty_time(&bfqg->stats);
227 if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
228 bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
229}
230
231void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op)
232{
233 blkg_rwstat_add(&bfqg->stats.queued, op, -1);
234}
235
236void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op)
237{
238 blkg_rwstat_add(&bfqg->stats.merged, op, 1);
239}
240
241void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
242 u64 io_start_time_ns, unsigned int op)
243{
244 struct bfqg_stats *stats = &bfqg->stats;
245 u64 now = ktime_get_ns();
246
247 if (now > io_start_time_ns)
248 blkg_rwstat_add(&stats->service_time, op,
249 now - io_start_time_ns);
250 if (io_start_time_ns > start_time_ns)
251 blkg_rwstat_add(&stats->wait_time, op,
252 io_start_time_ns - start_time_ns);
253}
254
255#else
256
257void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
258 unsigned int op) { }
259void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
260void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
261void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
262 u64 io_start_time_ns, unsigned int op) { }
263void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
264void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
265void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
266void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { }
267void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { }
268
269#endif
270
271#ifdef CONFIG_BFQ_GROUP_IOSCHED
272
273
274
275
276
277
278
279static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
280{
281 return pd ? container_of(pd, struct bfq_group, pd) : NULL;
282}
283
284struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
285{
286 return pd_to_blkg(&bfqg->pd);
287}
288
289static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
290{
291 return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
292}
293
294
295
296
297
298
299
300
301static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
302{
303 struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
304
305 return pblkg ? blkg_to_bfqg(pblkg) : NULL;
306}
307
308struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
309{
310 struct bfq_entity *group_entity = bfqq->entity.parent;
311
312 return group_entity ? container_of(group_entity, struct bfq_group,
313 entity) :
314 bfqq->bfqd->root_group;
315}
316
317
318
319
320
321
322static void bfqg_get(struct bfq_group *bfqg)
323{
324 bfqg->ref++;
325}
326
327static void bfqg_put(struct bfq_group *bfqg)
328{
329 bfqg->ref--;
330
331 if (bfqg->ref == 0)
332 kfree(bfqg);
333}
334
335static void bfqg_and_blkg_get(struct bfq_group *bfqg)
336{
337
338 bfqg_get(bfqg);
339
340 blkg_get(bfqg_to_blkg(bfqg));
341}
342
343void bfqg_and_blkg_put(struct bfq_group *bfqg)
344{
345 blkg_put(bfqg_to_blkg(bfqg));
346
347 bfqg_put(bfqg);
348}
349
350void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
351{
352 struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
353
354 if (!bfqg)
355 return;
356
357 blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
358 blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
359}
360
361
362static void bfqg_stats_reset(struct bfqg_stats *stats)
363{
364#ifdef CONFIG_BFQ_CGROUP_DEBUG
365
366 blkg_rwstat_reset(&stats->merged);
367 blkg_rwstat_reset(&stats->service_time);
368 blkg_rwstat_reset(&stats->wait_time);
369 bfq_stat_reset(&stats->time);
370 bfq_stat_reset(&stats->avg_queue_size_sum);
371 bfq_stat_reset(&stats->avg_queue_size_samples);
372 bfq_stat_reset(&stats->dequeue);
373 bfq_stat_reset(&stats->group_wait_time);
374 bfq_stat_reset(&stats->idle_time);
375 bfq_stat_reset(&stats->empty_time);
376#endif
377}
378
379
380static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from)
381{
382 if (!to || !from)
383 return;
384
385#ifdef CONFIG_BFQ_CGROUP_DEBUG
386
387 blkg_rwstat_add_aux(&to->merged, &from->merged);
388 blkg_rwstat_add_aux(&to->service_time, &from->service_time);
389 blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
390 bfq_stat_add_aux(&from->time, &from->time);
391 bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
392 bfq_stat_add_aux(&to->avg_queue_size_samples,
393 &from->avg_queue_size_samples);
394 bfq_stat_add_aux(&to->dequeue, &from->dequeue);
395 bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
396 bfq_stat_add_aux(&to->idle_time, &from->idle_time);
397 bfq_stat_add_aux(&to->empty_time, &from->empty_time);
398#endif
399}
400
401
402
403
404
405
406static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
407{
408 struct bfq_group *parent;
409
410 if (!bfqg)
411 return;
412
413 parent = bfqg_parent(bfqg);
414
415 lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock);
416
417 if (unlikely(!parent))
418 return;
419
420 bfqg_stats_add_aux(&parent->stats, &bfqg->stats);
421 bfqg_stats_reset(&bfqg->stats);
422}
423
424void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
425{
426 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
427
428 entity->weight = entity->new_weight;
429 entity->orig_weight = entity->new_weight;
430 if (bfqq) {
431 bfqq->ioprio = bfqq->new_ioprio;
432 bfqq->ioprio_class = bfqq->new_ioprio_class;
433
434
435
436
437 bfqg_and_blkg_get(bfqg);
438 }
439 entity->parent = bfqg->my_entity;
440 entity->sched_data = &bfqg->sched_data;
441}
442
443static void bfqg_stats_exit(struct bfqg_stats *stats)
444{
445 blkg_rwstat_exit(&stats->bytes);
446 blkg_rwstat_exit(&stats->ios);
447#ifdef CONFIG_BFQ_CGROUP_DEBUG
448 blkg_rwstat_exit(&stats->merged);
449 blkg_rwstat_exit(&stats->service_time);
450 blkg_rwstat_exit(&stats->wait_time);
451 blkg_rwstat_exit(&stats->queued);
452 bfq_stat_exit(&stats->time);
453 bfq_stat_exit(&stats->avg_queue_size_sum);
454 bfq_stat_exit(&stats->avg_queue_size_samples);
455 bfq_stat_exit(&stats->dequeue);
456 bfq_stat_exit(&stats->group_wait_time);
457 bfq_stat_exit(&stats->idle_time);
458 bfq_stat_exit(&stats->empty_time);
459#endif
460}
461
462static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
463{
464 if (blkg_rwstat_init(&stats->bytes, gfp) ||
465 blkg_rwstat_init(&stats->ios, gfp))
466 return -ENOMEM;
467
468#ifdef CONFIG_BFQ_CGROUP_DEBUG
469 if (blkg_rwstat_init(&stats->merged, gfp) ||
470 blkg_rwstat_init(&stats->service_time, gfp) ||
471 blkg_rwstat_init(&stats->wait_time, gfp) ||
472 blkg_rwstat_init(&stats->queued, gfp) ||
473 bfq_stat_init(&stats->time, gfp) ||
474 bfq_stat_init(&stats->avg_queue_size_sum, gfp) ||
475 bfq_stat_init(&stats->avg_queue_size_samples, gfp) ||
476 bfq_stat_init(&stats->dequeue, gfp) ||
477 bfq_stat_init(&stats->group_wait_time, gfp) ||
478 bfq_stat_init(&stats->idle_time, gfp) ||
479 bfq_stat_init(&stats->empty_time, gfp)) {
480 bfqg_stats_exit(stats);
481 return -ENOMEM;
482 }
483#endif
484
485 return 0;
486}
487
488static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
489{
490 return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
491}
492
493static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
494{
495 return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
496}
497
498static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
499{
500 struct bfq_group_data *bgd;
501
502 bgd = kzalloc(sizeof(*bgd), gfp);
503 if (!bgd)
504 return NULL;
505 return &bgd->pd;
506}
507
508static void bfq_cpd_init(struct blkcg_policy_data *cpd)
509{
510 struct bfq_group_data *d = cpd_to_bfqgd(cpd);
511
512 d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
513 CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
514}
515
516static void bfq_cpd_free(struct blkcg_policy_data *cpd)
517{
518 kfree(cpd_to_bfqgd(cpd));
519}
520
521static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
522 struct blkcg *blkcg)
523{
524 struct bfq_group *bfqg;
525
526 bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
527 if (!bfqg)
528 return NULL;
529
530 if (bfqg_stats_init(&bfqg->stats, gfp)) {
531 kfree(bfqg);
532 return NULL;
533 }
534
535
536 bfqg_get(bfqg);
537 return &bfqg->pd;
538}
539
540static void bfq_pd_init(struct blkg_policy_data *pd)
541{
542 struct blkcg_gq *blkg = pd_to_blkg(pd);
543 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
544 struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
545 struct bfq_entity *entity = &bfqg->entity;
546 struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
547
548 entity->orig_weight = entity->weight = entity->new_weight = d->weight;
549 entity->my_sched_data = &bfqg->sched_data;
550 entity->last_bfqq_created = NULL;
551
552 bfqg->my_entity = entity;
553
554
555
556 bfqg->bfqd = bfqd;
557 bfqg->active_entities = 0;
558 bfqg->rq_pos_tree = RB_ROOT;
559}
560
561static void bfq_pd_free(struct blkg_policy_data *pd)
562{
563 struct bfq_group *bfqg = pd_to_bfqg(pd);
564
565 bfqg_stats_exit(&bfqg->stats);
566 bfqg_put(bfqg);
567}
568
569static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
570{
571 struct bfq_group *bfqg = pd_to_bfqg(pd);
572
573 bfqg_stats_reset(&bfqg->stats);
574}
575
576static void bfq_group_set_parent(struct bfq_group *bfqg,
577 struct bfq_group *parent)
578{
579 struct bfq_entity *entity;
580
581 entity = &bfqg->entity;
582 entity->parent = parent->my_entity;
583 entity->sched_data = &parent->sched_data;
584}
585
586static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd,
587 struct blkcg *blkcg)
588{
589 struct blkcg_gq *blkg;
590
591 blkg = blkg_lookup(blkcg, bfqd->queue);
592 if (likely(blkg))
593 return blkg_to_bfqg(blkg);
594 return NULL;
595}
596
597struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
598 struct blkcg *blkcg)
599{
600 struct bfq_group *bfqg, *parent;
601 struct bfq_entity *entity;
602
603 bfqg = bfq_lookup_bfqg(bfqd, blkcg);
604
605 if (unlikely(!bfqg))
606 return NULL;
607
608
609
610
611
612
613 entity = &bfqg->entity;
614 for_each_entity(entity) {
615 struct bfq_group *curr_bfqg = container_of(entity,
616 struct bfq_group, entity);
617 if (curr_bfqg != bfqd->root_group) {
618 parent = bfqg_parent(curr_bfqg);
619 if (!parent)
620 parent = bfqd->root_group;
621 bfq_group_set_parent(curr_bfqg, parent);
622 }
623 }
624
625 return bfqg;
626}
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
643 struct bfq_group *bfqg)
644{
645 struct bfq_entity *entity = &bfqq->entity;
646
647
648
649
650
651 bfqq->ref++;
652
653
654
655
656
657
658
659 if (bfqq == bfqd->in_service_queue)
660 bfq_bfqq_expire(bfqd, bfqd->in_service_queue,
661 false, BFQQE_PREEMPTED);
662
663 if (bfq_bfqq_busy(bfqq))
664 bfq_deactivate_bfqq(bfqd, bfqq, false, false);
665 else if (entity->on_st_or_in_serv)
666 bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
667 bfqg_and_blkg_put(bfqq_group(bfqq));
668
669 entity->parent = bfqg->my_entity;
670 entity->sched_data = &bfqg->sched_data;
671
672 bfqg_and_blkg_get(bfqg);
673
674 if (bfq_bfqq_busy(bfqq)) {
675 if (unlikely(!bfqd->nonrot_with_queueing))
676 bfq_pos_tree_add_move(bfqd, bfqq);
677 bfq_activate_bfqq(bfqd, bfqq);
678 }
679
680 if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
681 bfq_schedule_dispatch(bfqd);
682
683 bfq_put_queue(bfqq);
684}
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
701 struct bfq_io_cq *bic,
702 struct blkcg *blkcg)
703{
704 struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
705 struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
706 struct bfq_group *bfqg;
707 struct bfq_entity *entity;
708
709 bfqg = bfq_find_set_group(bfqd, blkcg);
710
711 if (unlikely(!bfqg))
712 bfqg = bfqd->root_group;
713
714 if (async_bfqq) {
715 entity = &async_bfqq->entity;
716
717 if (entity->sched_data != &bfqg->sched_data) {
718 bic_set_bfqq(bic, NULL, 0);
719 bfq_release_process_ref(bfqd, async_bfqq);
720 }
721 }
722
723 if (sync_bfqq) {
724 entity = &sync_bfqq->entity;
725 if (entity->sched_data != &bfqg->sched_data)
726 bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
727 }
728
729 return bfqg;
730}
731
732void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
733{
734 struct bfq_data *bfqd = bic_to_bfqd(bic);
735 struct bfq_group *bfqg = NULL;
736 uint64_t serial_nr;
737
738 rcu_read_lock();
739 serial_nr = __bio_blkcg(bio)->css.serial_nr;
740
741
742
743
744
745 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
746 goto out;
747
748 bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799 blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
800 bic->blkcg_serial_nr = serial_nr;
801out:
802 rcu_read_unlock();
803}
804
805
806
807
808
809static void bfq_flush_idle_tree(struct bfq_service_tree *st)
810{
811 struct bfq_entity *entity = st->first_idle;
812
813 for (; entity ; entity = st->first_idle)
814 __bfq_deactivate_entity(entity, false);
815}
816
817
818
819
820
821
822
823static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
824 struct bfq_entity *entity,
825 int ioprio_class)
826{
827 struct bfq_queue *bfqq;
828 struct bfq_entity *child_entity = entity;
829
830 while (child_entity->my_sched_data) {
831 struct bfq_sched_data *child_sd = child_entity->my_sched_data;
832 struct bfq_service_tree *child_st = child_sd->service_tree +
833 ioprio_class;
834 struct rb_root *child_active = &child_st->active;
835
836 child_entity = bfq_entity_of(rb_first(child_active));
837
838 if (!child_entity)
839 child_entity = child_sd->in_service_entity;
840 }
841
842 bfqq = bfq_entity_to_bfqq(child_entity);
843 bfq_bfqq_move(bfqd, bfqq, bfqd->root_group);
844}
845
846
847
848
849
850
851
852static void bfq_reparent_active_queues(struct bfq_data *bfqd,
853 struct bfq_group *bfqg,
854 struct bfq_service_tree *st,
855 int ioprio_class)
856{
857 struct rb_root *active = &st->active;
858 struct bfq_entity *entity;
859
860 while ((entity = bfq_entity_of(rb_first(active))))
861 bfq_reparent_leaf_entity(bfqd, entity, ioprio_class);
862
863 if (bfqg->sched_data.in_service_entity)
864 bfq_reparent_leaf_entity(bfqd,
865 bfqg->sched_data.in_service_entity,
866 ioprio_class);
867}
868
869
870
871
872
873
874
875
876
877static void bfq_pd_offline(struct blkg_policy_data *pd)
878{
879 struct bfq_service_tree *st;
880 struct bfq_group *bfqg = pd_to_bfqg(pd);
881 struct bfq_data *bfqd = bfqg->bfqd;
882 struct bfq_entity *entity = bfqg->my_entity;
883 unsigned long flags;
884 int i;
885
886 spin_lock_irqsave(&bfqd->lock, flags);
887
888 if (!entity)
889 goto put_async_queues;
890
891
892
893
894
895 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
896 st = bfqg->sched_data.service_tree + i;
897
898
899
900
901
902
903
904
905
906
907
908
909
910 bfq_reparent_active_queues(bfqd, bfqg, st, i);
911
912
913
914
915
916
917
918
919
920
921
922
923 bfq_flush_idle_tree(st);
924 }
925
926 __bfq_deactivate_entity(entity, false);
927
928put_async_queues:
929 bfq_put_async_queues(bfqd, bfqg);
930
931 spin_unlock_irqrestore(&bfqd->lock, flags);
932
933
934
935
936
937
938 bfqg_stats_xfer_dead(bfqg);
939}
940
941void bfq_end_wr_async(struct bfq_data *bfqd)
942{
943 struct blkcg_gq *blkg;
944
945 list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
946 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
947
948 bfq_end_wr_async_queues(bfqd, bfqg);
949 }
950 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
951}
952
953static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
954{
955 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
956 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
957 unsigned int val = 0;
958
959 if (bfqgd)
960 val = bfqgd->weight;
961
962 seq_printf(sf, "%u\n", val);
963
964 return 0;
965}
966
967static u64 bfqg_prfill_weight_device(struct seq_file *sf,
968 struct blkg_policy_data *pd, int off)
969{
970 struct bfq_group *bfqg = pd_to_bfqg(pd);
971
972 if (!bfqg->entity.dev_weight)
973 return 0;
974 return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
975}
976
977static int bfq_io_show_weight(struct seq_file *sf, void *v)
978{
979 struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
980 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
981
982 seq_printf(sf, "default %u\n", bfqgd->weight);
983 blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
984 &blkcg_policy_bfq, 0, false);
985 return 0;
986}
987
988static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight)
989{
990 weight = dev_weight ?: weight;
991
992 bfqg->entity.dev_weight = dev_weight;
993
994
995
996
997
998
999 if ((unsigned short)weight != bfqg->entity.new_weight) {
1000 bfqg->entity.new_weight = (unsigned short)weight;
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016 smp_wmb();
1017 bfqg->entity.prio_changed = 1;
1018 }
1019}
1020
1021static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css,
1022 struct cftype *cftype,
1023 u64 val)
1024{
1025 struct blkcg *blkcg = css_to_blkcg(css);
1026 struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
1027 struct blkcg_gq *blkg;
1028 int ret = -ERANGE;
1029
1030 if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
1031 return ret;
1032
1033 ret = 0;
1034 spin_lock_irq(&blkcg->lock);
1035 bfqgd->weight = (unsigned short)val;
1036 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
1037 struct bfq_group *bfqg = blkg_to_bfqg(blkg);
1038
1039 if (bfqg)
1040 bfq_group_set_weight(bfqg, val, 0);
1041 }
1042 spin_unlock_irq(&blkcg->lock);
1043
1044 return ret;
1045}
1046
1047static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
1048 char *buf, size_t nbytes,
1049 loff_t off)
1050{
1051 int ret;
1052 struct blkg_conf_ctx ctx;
1053 struct blkcg *blkcg = css_to_blkcg(of_css(of));
1054 struct bfq_group *bfqg;
1055 u64 v;
1056
1057 ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
1058 if (ret)
1059 return ret;
1060
1061 if (sscanf(ctx.body, "%llu", &v) == 1) {
1062
1063 ret = -ERANGE;
1064 if (!v)
1065 goto out;
1066 } else if (!strcmp(strim(ctx.body), "default")) {
1067 v = 0;
1068 } else {
1069 ret = -EINVAL;
1070 goto out;
1071 }
1072
1073 bfqg = blkg_to_bfqg(ctx.blkg);
1074
1075 ret = -ERANGE;
1076 if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
1077 bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
1078 ret = 0;
1079 }
1080out:
1081 blkg_conf_finish(&ctx);
1082 return ret ?: nbytes;
1083}
1084
1085static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
1086 char *buf, size_t nbytes,
1087 loff_t off)
1088{
1089 char *endp;
1090 int ret;
1091 u64 v;
1092
1093 buf = strim(buf);
1094
1095
1096 v = simple_strtoull(buf, &endp, 0);
1097 if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) {
1098 ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
1099 return ret ?: nbytes;
1100 }
1101
1102 return bfq_io_set_device_weight(of, buf, nbytes, off);
1103}
1104
1105static int bfqg_print_rwstat(struct seq_file *sf, void *v)
1106{
1107 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1108 &blkcg_policy_bfq, seq_cft(sf)->private, true);
1109 return 0;
1110}
1111
1112static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
1113 struct blkg_policy_data *pd, int off)
1114{
1115 struct blkg_rwstat_sample sum;
1116
1117 blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
1118 return __blkg_prfill_rwstat(sf, pd, &sum);
1119}
1120
1121static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
1122{
1123 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1124 bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
1125 seq_cft(sf)->private, true);
1126 return 0;
1127}
1128
1129#ifdef CONFIG_BFQ_CGROUP_DEBUG
1130static int bfqg_print_stat(struct seq_file *sf, void *v)
1131{
1132 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1133 &blkcg_policy_bfq, seq_cft(sf)->private, false);
1134 return 0;
1135}
1136
1137static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
1138 struct blkg_policy_data *pd, int off)
1139{
1140 struct blkcg_gq *blkg = pd_to_blkg(pd);
1141 struct blkcg_gq *pos_blkg;
1142 struct cgroup_subsys_state *pos_css;
1143 u64 sum = 0;
1144
1145 lockdep_assert_held(&blkg->q->queue_lock);
1146
1147 rcu_read_lock();
1148 blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
1149 struct bfq_stat *stat;
1150
1151 if (!pos_blkg->online)
1152 continue;
1153
1154 stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
1155 sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
1156 }
1157 rcu_read_unlock();
1158
1159 return __blkg_prfill_u64(sf, pd, sum);
1160}
1161
1162static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
1163{
1164 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1165 bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
1166 seq_cft(sf)->private, false);
1167 return 0;
1168}
1169
1170static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
1171 int off)
1172{
1173 struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
1174 u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
1175
1176 return __blkg_prfill_u64(sf, pd, sum >> 9);
1177}
1178
1179static int bfqg_print_stat_sectors(struct seq_file *sf, void *v)
1180{
1181 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1182 bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false);
1183 return 0;
1184}
1185
1186static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
1187 struct blkg_policy_data *pd, int off)
1188{
1189 struct blkg_rwstat_sample tmp;
1190
1191 blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
1192 offsetof(struct bfq_group, stats.bytes), &tmp);
1193
1194 return __blkg_prfill_u64(sf, pd,
1195 (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
1196}
1197
1198static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v)
1199{
1200 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1201 bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0,
1202 false);
1203 return 0;
1204}
1205
1206static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
1207 struct blkg_policy_data *pd, int off)
1208{
1209 struct bfq_group *bfqg = pd_to_bfqg(pd);
1210 u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples);
1211 u64 v = 0;
1212
1213 if (samples) {
1214 v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum);
1215 v = div64_u64(v, samples);
1216 }
1217 __blkg_prfill_u64(sf, pd, v);
1218 return 0;
1219}
1220
1221
1222static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
1223{
1224 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1225 bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
1226 0, false);
1227 return 0;
1228}
1229#endif
1230
1231struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1232{
1233 int ret;
1234
1235 ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
1236 if (ret)
1237 return NULL;
1238
1239 return blkg_to_bfqg(bfqd->queue->root_blkg);
1240}
1241
1242struct blkcg_policy blkcg_policy_bfq = {
1243 .dfl_cftypes = bfq_blkg_files,
1244 .legacy_cftypes = bfq_blkcg_legacy_files,
1245
1246 .cpd_alloc_fn = bfq_cpd_alloc,
1247 .cpd_init_fn = bfq_cpd_init,
1248 .cpd_bind_fn = bfq_cpd_init,
1249 .cpd_free_fn = bfq_cpd_free,
1250
1251 .pd_alloc_fn = bfq_pd_alloc,
1252 .pd_init_fn = bfq_pd_init,
1253 .pd_offline_fn = bfq_pd_offline,
1254 .pd_free_fn = bfq_pd_free,
1255 .pd_reset_stats_fn = bfq_pd_reset_stats,
1256};
1257
1258struct cftype bfq_blkcg_legacy_files[] = {
1259 {
1260 .name = "bfq.weight",
1261 .flags = CFTYPE_NOT_ON_ROOT,
1262 .seq_show = bfq_io_show_weight_legacy,
1263 .write_u64 = bfq_io_set_weight_legacy,
1264 },
1265 {
1266 .name = "bfq.weight_device",
1267 .flags = CFTYPE_NOT_ON_ROOT,
1268 .seq_show = bfq_io_show_weight,
1269 .write = bfq_io_set_weight,
1270 },
1271
1272
1273 {
1274 .name = "bfq.io_service_bytes",
1275 .private = offsetof(struct bfq_group, stats.bytes),
1276 .seq_show = bfqg_print_rwstat,
1277 },
1278 {
1279 .name = "bfq.io_serviced",
1280 .private = offsetof(struct bfq_group, stats.ios),
1281 .seq_show = bfqg_print_rwstat,
1282 },
1283#ifdef CONFIG_BFQ_CGROUP_DEBUG
1284 {
1285 .name = "bfq.time",
1286 .private = offsetof(struct bfq_group, stats.time),
1287 .seq_show = bfqg_print_stat,
1288 },
1289 {
1290 .name = "bfq.sectors",
1291 .seq_show = bfqg_print_stat_sectors,
1292 },
1293 {
1294 .name = "bfq.io_service_time",
1295 .private = offsetof(struct bfq_group, stats.service_time),
1296 .seq_show = bfqg_print_rwstat,
1297 },
1298 {
1299 .name = "bfq.io_wait_time",
1300 .private = offsetof(struct bfq_group, stats.wait_time),
1301 .seq_show = bfqg_print_rwstat,
1302 },
1303 {
1304 .name = "bfq.io_merged",
1305 .private = offsetof(struct bfq_group, stats.merged),
1306 .seq_show = bfqg_print_rwstat,
1307 },
1308 {
1309 .name = "bfq.io_queued",
1310 .private = offsetof(struct bfq_group, stats.queued),
1311 .seq_show = bfqg_print_rwstat,
1312 },
1313#endif
1314
1315
1316 {
1317 .name = "bfq.io_service_bytes_recursive",
1318 .private = offsetof(struct bfq_group, stats.bytes),
1319 .seq_show = bfqg_print_rwstat_recursive,
1320 },
1321 {
1322 .name = "bfq.io_serviced_recursive",
1323 .private = offsetof(struct bfq_group, stats.ios),
1324 .seq_show = bfqg_print_rwstat_recursive,
1325 },
1326#ifdef CONFIG_BFQ_CGROUP_DEBUG
1327 {
1328 .name = "bfq.time_recursive",
1329 .private = offsetof(struct bfq_group, stats.time),
1330 .seq_show = bfqg_print_stat_recursive,
1331 },
1332 {
1333 .name = "bfq.sectors_recursive",
1334 .seq_show = bfqg_print_stat_sectors_recursive,
1335 },
1336 {
1337 .name = "bfq.io_service_time_recursive",
1338 .private = offsetof(struct bfq_group, stats.service_time),
1339 .seq_show = bfqg_print_rwstat_recursive,
1340 },
1341 {
1342 .name = "bfq.io_wait_time_recursive",
1343 .private = offsetof(struct bfq_group, stats.wait_time),
1344 .seq_show = bfqg_print_rwstat_recursive,
1345 },
1346 {
1347 .name = "bfq.io_merged_recursive",
1348 .private = offsetof(struct bfq_group, stats.merged),
1349 .seq_show = bfqg_print_rwstat_recursive,
1350 },
1351 {
1352 .name = "bfq.io_queued_recursive",
1353 .private = offsetof(struct bfq_group, stats.queued),
1354 .seq_show = bfqg_print_rwstat_recursive,
1355 },
1356 {
1357 .name = "bfq.avg_queue_size",
1358 .seq_show = bfqg_print_avg_queue_size,
1359 },
1360 {
1361 .name = "bfq.group_wait_time",
1362 .private = offsetof(struct bfq_group, stats.group_wait_time),
1363 .seq_show = bfqg_print_stat,
1364 },
1365 {
1366 .name = "bfq.idle_time",
1367 .private = offsetof(struct bfq_group, stats.idle_time),
1368 .seq_show = bfqg_print_stat,
1369 },
1370 {
1371 .name = "bfq.empty_time",
1372 .private = offsetof(struct bfq_group, stats.empty_time),
1373 .seq_show = bfqg_print_stat,
1374 },
1375 {
1376 .name = "bfq.dequeue",
1377 .private = offsetof(struct bfq_group, stats.dequeue),
1378 .seq_show = bfqg_print_stat,
1379 },
1380#endif
1381 { }
1382};
1383
1384struct cftype bfq_blkg_files[] = {
1385 {
1386 .name = "bfq.weight",
1387 .flags = CFTYPE_NOT_ON_ROOT,
1388 .seq_show = bfq_io_show_weight,
1389 .write = bfq_io_set_weight,
1390 },
1391 {}
1392};
1393
1394#else
1395
1396void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1397 struct bfq_group *bfqg) {}
1398
1399void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
1400{
1401 struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
1402
1403 entity->weight = entity->new_weight;
1404 entity->orig_weight = entity->new_weight;
1405 if (bfqq) {
1406 bfqq->ioprio = bfqq->new_ioprio;
1407 bfqq->ioprio_class = bfqq->new_ioprio_class;
1408 }
1409 entity->sched_data = &bfqg->sched_data;
1410}
1411
1412void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {}
1413
1414void bfq_end_wr_async(struct bfq_data *bfqd)
1415{
1416 bfq_end_wr_async_queues(bfqd, bfqd->root_group);
1417}
1418
1419struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg)
1420{
1421 return bfqd->root_group;
1422}
1423
1424struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
1425{
1426 return bfqq->bfqd->root_group;
1427}
1428
1429void bfqg_and_blkg_get(struct bfq_group *bfqg) {}
1430
1431void bfqg_and_blkg_put(struct bfq_group *bfqg) {}
1432
1433struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
1434{
1435 struct bfq_group *bfqg;
1436 int i;
1437
1438 bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
1439 if (!bfqg)
1440 return NULL;
1441
1442 for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
1443 bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
1444
1445 return bfqg;
1446}
1447#endif
1448