1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/ioprio.h>
14#include <linux/kdev_t.h>
15#include <linux/module.h>
16#include <linux/err.h>
17#include <linux/blkdev.h>
18#include <linux/slab.h>
19#include <linux/genhd.h>
20#include <linux/delay.h>
21#include <linux/atomic.h>
22#include "blk-cgroup.h"
23#include "blk.h"
24
25#define MAX_KEY_LEN 100
26
27static DEFINE_MUTEX(blkcg_pol_mutex);
28
29struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
30 .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
31EXPORT_SYMBOL_GPL(blkcg_root);
32
33static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
34
35static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
36 struct request_queue *q, bool update_hint);
37
38
39
40
41
42
43
44
45
46
47
48
49
50#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
51 cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
52 if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
53 (p_blkg)->q, false)))
54
55static bool blkcg_policy_enabled(struct request_queue *q,
56 const struct blkcg_policy *pol)
57{
58 return pol && test_bit(pol->plid, q->blkcg_pols);
59}
60
61
62
63
64
65
66
67static void blkg_free(struct blkcg_gq *blkg)
68{
69 int i;
70
71 if (!blkg)
72 return;
73
74 for (i = 0; i < BLKCG_MAX_POLS; i++) {
75 struct blkcg_policy *pol = blkcg_policy[i];
76 struct blkg_policy_data *pd = blkg->pd[i];
77
78 if (!pd)
79 continue;
80
81 if (pol && pol->pd_exit_fn)
82 pol->pd_exit_fn(blkg);
83
84 kfree(pd);
85 }
86
87 blk_exit_rl(&blkg->rl);
88 kfree(blkg);
89}
90
91
92
93
94
95
96
97
98
99static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
100 gfp_t gfp_mask)
101{
102 struct blkcg_gq *blkg;
103 int i;
104
105
106 blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
107 if (!blkg)
108 return NULL;
109
110 blkg->q = q;
111 INIT_LIST_HEAD(&blkg->q_node);
112 blkg->blkcg = blkcg;
113 blkg->refcnt = 1;
114
115
116 if (blkcg != &blkcg_root) {
117 if (blk_init_rl(&blkg->rl, q, gfp_mask))
118 goto err_free;
119 blkg->rl.blkg = blkg;
120 }
121
122 for (i = 0; i < BLKCG_MAX_POLS; i++) {
123 struct blkcg_policy *pol = blkcg_policy[i];
124 struct blkg_policy_data *pd;
125
126 if (!blkcg_policy_enabled(q, pol))
127 continue;
128
129
130 pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
131 if (!pd)
132 goto err_free;
133
134 blkg->pd[i] = pd;
135 pd->blkg = blkg;
136 pd->plid = i;
137
138
139 if (pol->pd_init_fn)
140 pol->pd_init_fn(blkg);
141 }
142
143 return blkg;
144
145err_free:
146 blkg_free(blkg);
147 return NULL;
148}
149
150
151
152
153
154
155
156
157
158
159
160
161static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
162 struct request_queue *q, bool update_hint)
163{
164 struct blkcg_gq *blkg;
165
166 blkg = rcu_dereference(blkcg->blkg_hint);
167 if (blkg && blkg->q == q)
168 return blkg;
169
170
171
172
173
174
175
176 blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
177 if (blkg && blkg->q == q) {
178 if (update_hint) {
179 lockdep_assert_held(q->queue_lock);
180 rcu_assign_pointer(blkcg->blkg_hint, blkg);
181 }
182 return blkg;
183 }
184
185 return NULL;
186}
187
188
189
190
191
192
193
194
195
196
197struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
198{
199 WARN_ON_ONCE(!rcu_read_lock_held());
200
201 if (unlikely(blk_queue_bypass(q)))
202 return NULL;
203 return __blkg_lookup(blkcg, q, false);
204}
205EXPORT_SYMBOL_GPL(blkg_lookup);
206
207
208
209
210
211static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
212 struct request_queue *q,
213 struct blkcg_gq *new_blkg)
214{
215 struct blkcg_gq *blkg;
216 int i, ret;
217
218 WARN_ON_ONCE(!rcu_read_lock_held());
219 lockdep_assert_held(q->queue_lock);
220
221
222 if (!css_tryget(&blkcg->css)) {
223 ret = -EINVAL;
224 goto err_free_blkg;
225 }
226
227
228 if (!new_blkg) {
229 new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
230 if (unlikely(!new_blkg)) {
231 ret = -ENOMEM;
232 goto err_put_css;
233 }
234 }
235 blkg = new_blkg;
236
237
238 if (blkcg_parent(blkcg)) {
239 blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
240 if (WARN_ON_ONCE(!blkg->parent)) {
241 blkg = ERR_PTR(-EINVAL);
242 goto err_put_css;
243 }
244 blkg_get(blkg->parent);
245 }
246
247 spin_lock(&blkcg->lock);
248 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
249 if (likely(!ret)) {
250 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
251 list_add(&blkg->q_node, &q->blkg_list);
252
253 for (i = 0; i < BLKCG_MAX_POLS; i++) {
254 struct blkcg_policy *pol = blkcg_policy[i];
255
256 if (blkg->pd[i] && pol->pd_online_fn)
257 pol->pd_online_fn(blkg);
258 }
259 }
260 blkg->online = true;
261 spin_unlock(&blkcg->lock);
262
263 if (!ret)
264 return blkg;
265
266
267 blkg_put(blkg);
268 return ERR_PTR(ret);
269
270err_put_css:
271 css_put(&blkcg->css);
272err_free_blkg:
273 blkg_free(new_blkg);
274 return ERR_PTR(ret);
275}
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
292 struct request_queue *q)
293{
294 struct blkcg_gq *blkg;
295
296 WARN_ON_ONCE(!rcu_read_lock_held());
297 lockdep_assert_held(q->queue_lock);
298
299
300
301
302
303 if (unlikely(blk_queue_bypass(q)))
304 return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY);
305
306 blkg = __blkg_lookup(blkcg, q, true);
307 if (blkg)
308 return blkg;
309
310
311
312
313
314 while (true) {
315 struct blkcg *pos = blkcg;
316 struct blkcg *parent = blkcg_parent(blkcg);
317
318 while (parent && !__blkg_lookup(parent, q, false)) {
319 pos = parent;
320 parent = blkcg_parent(parent);
321 }
322
323 blkg = blkg_create(pos, q, NULL);
324 if (pos == blkcg || IS_ERR(blkg))
325 return blkg;
326 }
327}
328EXPORT_SYMBOL_GPL(blkg_lookup_create);
329
330static void blkg_destroy(struct blkcg_gq *blkg)
331{
332 struct blkcg *blkcg = blkg->blkcg;
333 int i;
334
335 lockdep_assert_held(blkg->q->queue_lock);
336 lockdep_assert_held(&blkcg->lock);
337
338
339 WARN_ON_ONCE(list_empty(&blkg->q_node));
340 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
341
342 for (i = 0; i < BLKCG_MAX_POLS; i++) {
343 struct blkcg_policy *pol = blkcg_policy[i];
344
345 if (blkg->pd[i] && pol->pd_offline_fn)
346 pol->pd_offline_fn(blkg);
347 }
348 blkg->online = false;
349
350 radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
351 list_del_init(&blkg->q_node);
352 hlist_del_init_rcu(&blkg->blkcg_node);
353
354
355
356
357
358
359 if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
360 rcu_assign_pointer(blkcg->blkg_hint, NULL);
361
362
363
364
365
366 blkg_put(blkg);
367}
368
369
370
371
372
373
374
375static void blkg_destroy_all(struct request_queue *q)
376{
377 struct blkcg_gq *blkg, *n;
378
379 lockdep_assert_held(q->queue_lock);
380
381 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
382 struct blkcg *blkcg = blkg->blkcg;
383
384 spin_lock(&blkcg->lock);
385 blkg_destroy(blkg);
386 spin_unlock(&blkcg->lock);
387 }
388
389
390
391
392
393 q->root_blkg = NULL;
394 q->root_rl.blkg = NULL;
395}
396
397static void blkg_rcu_free(struct rcu_head *rcu_head)
398{
399 blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head));
400}
401
402void __blkg_release(struct blkcg_gq *blkg)
403{
404
405 css_put(&blkg->blkcg->css);
406 if (blkg->parent)
407 blkg_put(blkg->parent);
408
409
410
411
412
413
414
415
416
417
418 call_rcu(&blkg->rcu_head, blkg_rcu_free);
419}
420EXPORT_SYMBOL_GPL(__blkg_release);
421
422
423
424
425
426struct request_list *__blk_queue_next_rl(struct request_list *rl,
427 struct request_queue *q)
428{
429 struct list_head *ent;
430 struct blkcg_gq *blkg;
431
432
433
434
435
436 if (rl == &q->root_rl) {
437 ent = &q->blkg_list;
438
439 if (list_empty(ent))
440 return NULL;
441 } else {
442 blkg = container_of(rl, struct blkcg_gq, rl);
443 ent = &blkg->q_node;
444 }
445
446
447 ent = ent->next;
448 if (ent == &q->root_blkg->q_node)
449 ent = ent->next;
450 if (ent == &q->blkg_list)
451 return NULL;
452
453 blkg = container_of(ent, struct blkcg_gq, q_node);
454 return &blkg->rl;
455}
456
457static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
458 u64 val)
459{
460 struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
461 struct blkcg_gq *blkg;
462 int i;
463
464 mutex_lock(&blkcg_pol_mutex);
465 spin_lock_irq(&blkcg->lock);
466
467
468
469
470
471
472 hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
473 for (i = 0; i < BLKCG_MAX_POLS; i++) {
474 struct blkcg_policy *pol = blkcg_policy[i];
475
476 if (blkcg_policy_enabled(blkg->q, pol) &&
477 pol->pd_reset_stats_fn)
478 pol->pd_reset_stats_fn(blkg);
479 }
480 }
481
482 spin_unlock_irq(&blkcg->lock);
483 mutex_unlock(&blkcg_pol_mutex);
484 return 0;
485}
486
487static const char *blkg_dev_name(struct blkcg_gq *blkg)
488{
489
490 if (blkg->q->backing_dev_info.dev)
491 return dev_name(blkg->q->backing_dev_info.dev);
492 return NULL;
493}
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
514 u64 (*prfill)(struct seq_file *,
515 struct blkg_policy_data *, int),
516 const struct blkcg_policy *pol, int data,
517 bool show_total)
518{
519 struct blkcg_gq *blkg;
520 u64 total = 0;
521
522 rcu_read_lock();
523 hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
524 spin_lock_irq(blkg->q->queue_lock);
525 if (blkcg_policy_enabled(blkg->q, pol))
526 total += prfill(sf, blkg->pd[pol->plid], data);
527 spin_unlock_irq(blkg->q->queue_lock);
528 }
529 rcu_read_unlock();
530
531 if (show_total)
532 seq_printf(sf, "Total %llu\n", (unsigned long long)total);
533}
534EXPORT_SYMBOL_GPL(blkcg_print_blkgs);
535
536
537
538
539
540
541
542
543
544u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
545{
546 const char *dname = blkg_dev_name(pd->blkg);
547
548 if (!dname)
549 return 0;
550
551 seq_printf(sf, "%s %llu\n", dname, (unsigned long long)v);
552 return v;
553}
554EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
555
556
557
558
559
560
561
562
563
564u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
565 const struct blkg_rwstat *rwstat)
566{
567 static const char *rwstr[] = {
568 [BLKG_RWSTAT_READ] = "Read",
569 [BLKG_RWSTAT_WRITE] = "Write",
570 [BLKG_RWSTAT_SYNC] = "Sync",
571 [BLKG_RWSTAT_ASYNC] = "Async",
572 };
573 const char *dname = blkg_dev_name(pd->blkg);
574 u64 v;
575 int i;
576
577 if (!dname)
578 return 0;
579
580 for (i = 0; i < BLKG_RWSTAT_NR; i++)
581 seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
582 (unsigned long long)rwstat->cnt[i]);
583
584 v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
585 seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
586 return v;
587}
588EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
589
590
591
592
593
594
595
596
597
598u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off)
599{
600 return __blkg_prfill_u64(sf, pd, blkg_stat_read((void *)pd + off));
601}
602EXPORT_SYMBOL_GPL(blkg_prfill_stat);
603
604
605
606
607
608
609
610
611
612u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
613 int off)
614{
615 struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd + off);
616
617 return __blkg_prfill_rwstat(sf, pd, &rwstat);
618}
619EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
620
621
622
623
624
625
626
627
628
629
630u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
631{
632 struct blkcg_policy *pol = blkcg_policy[pd->plid];
633 struct blkcg_gq *pos_blkg;
634 struct cgroup *pos_cgrp;
635 u64 sum;
636
637 lockdep_assert_held(pd->blkg->q->queue_lock);
638
639 sum = blkg_stat_read((void *)pd + off);
640
641 rcu_read_lock();
642 blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
643 struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
644 struct blkg_stat *stat = (void *)pos_pd + off;
645
646 if (pos_blkg->online)
647 sum += blkg_stat_read(stat);
648 }
649 rcu_read_unlock();
650
651 return sum;
652}
653EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
654
655
656
657
658
659
660
661
662
663
664struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
665 int off)
666{
667 struct blkcg_policy *pol = blkcg_policy[pd->plid];
668 struct blkcg_gq *pos_blkg;
669 struct cgroup *pos_cgrp;
670 struct blkg_rwstat sum;
671 int i;
672
673 lockdep_assert_held(pd->blkg->q->queue_lock);
674
675 sum = blkg_rwstat_read((void *)pd + off);
676
677 rcu_read_lock();
678 blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
679 struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
680 struct blkg_rwstat *rwstat = (void *)pos_pd + off;
681 struct blkg_rwstat tmp;
682
683 if (!pos_blkg->online)
684 continue;
685
686 tmp = blkg_rwstat_read(rwstat);
687
688 for (i = 0; i < BLKG_RWSTAT_NR; i++)
689 sum.cnt[i] += tmp.cnt[i];
690 }
691 rcu_read_unlock();
692
693 return sum;
694}
695EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
696
697
698
699
700
701
702
703
704
705
706
707
708
709int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
710 const char *input, struct blkg_conf_ctx *ctx)
711 __acquires(rcu) __acquires(disk->queue->queue_lock)
712{
713 struct gendisk *disk;
714 struct blkcg_gq *blkg;
715 unsigned int major, minor;
716 unsigned long long v;
717 int part, ret;
718
719 if (sscanf(input, "%u:%u %llu", &major, &minor, &v) != 3)
720 return -EINVAL;
721
722 disk = get_gendisk(MKDEV(major, minor), &part);
723 if (!disk)
724 return -EINVAL;
725 if (part) {
726 put_disk(disk);
727 return -EINVAL;
728 }
729
730 rcu_read_lock();
731 spin_lock_irq(disk->queue->queue_lock);
732
733 if (blkcg_policy_enabled(disk->queue, pol))
734 blkg = blkg_lookup_create(blkcg, disk->queue);
735 else
736 blkg = ERR_PTR(-EINVAL);
737
738 if (IS_ERR(blkg)) {
739 ret = PTR_ERR(blkg);
740 rcu_read_unlock();
741 spin_unlock_irq(disk->queue->queue_lock);
742 put_disk(disk);
743
744
745
746
747
748
749 if (ret == -EBUSY) {
750 msleep(10);
751 ret = restart_syscall();
752 }
753 return ret;
754 }
755
756 ctx->disk = disk;
757 ctx->blkg = blkg;
758 ctx->v = v;
759 return 0;
760}
761EXPORT_SYMBOL_GPL(blkg_conf_prep);
762
763
764
765
766
767
768
769
770void blkg_conf_finish(struct blkg_conf_ctx *ctx)
771 __releases(ctx->disk->queue->queue_lock) __releases(rcu)
772{
773 spin_unlock_irq(ctx->disk->queue->queue_lock);
774 rcu_read_unlock();
775 put_disk(ctx->disk);
776}
777EXPORT_SYMBOL_GPL(blkg_conf_finish);
778
779struct cftype blkcg_files[] = {
780 {
781 .name = "reset_stats",
782 .write_u64 = blkcg_reset_stats,
783 },
784 { }
785};
786
787
788
789
790
791
792
793
794
795
796
797
798static void blkcg_css_offline(struct cgroup *cgroup)
799{
800 struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
801
802 spin_lock_irq(&blkcg->lock);
803
804 while (!hlist_empty(&blkcg->blkg_list)) {
805 struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
806 struct blkcg_gq, blkcg_node);
807 struct request_queue *q = blkg->q;
808
809 if (spin_trylock(q->queue_lock)) {
810 blkg_destroy(blkg);
811 spin_unlock(q->queue_lock);
812 } else {
813 spin_unlock_irq(&blkcg->lock);
814 cpu_relax();
815 spin_lock_irq(&blkcg->lock);
816 }
817 }
818
819 spin_unlock_irq(&blkcg->lock);
820}
821
822static void blkcg_css_free(struct cgroup *cgroup)
823{
824 struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
825
826 if (blkcg != &blkcg_root)
827 kfree(blkcg);
828}
829
830static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
831{
832 static atomic64_t id_seq = ATOMIC64_INIT(0);
833 struct blkcg *blkcg;
834 struct cgroup *parent = cgroup->parent;
835
836 if (!parent) {
837 blkcg = &blkcg_root;
838 goto done;
839 }
840
841 blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
842 if (!blkcg)
843 return ERR_PTR(-ENOMEM);
844
845 blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
846 blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
847 blkcg->id = atomic64_inc_return(&id_seq);
848done:
849 spin_lock_init(&blkcg->lock);
850 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
851 INIT_HLIST_HEAD(&blkcg->blkg_list);
852
853 return &blkcg->css;
854}
855
856
857
858
859
860
861
862
863
864
865
866int blkcg_init_queue(struct request_queue *q)
867{
868 might_sleep();
869
870 return blk_throtl_init(q);
871}
872
873
874
875
876
877
878
879void blkcg_drain_queue(struct request_queue *q)
880{
881 lockdep_assert_held(q->queue_lock);
882
883
884
885
886
887 if (!q->root_blkg)
888 return;
889
890
891
892
893
894 if (!q->root_blkg)
895 return;
896
897 blk_throtl_drain(q);
898}
899
900
901
902
903
904
905
906void blkcg_exit_queue(struct request_queue *q)
907{
908 spin_lock_irq(q->queue_lock);
909 blkg_destroy_all(q);
910 spin_unlock_irq(q->queue_lock);
911
912 blk_throtl_exit(q);
913}
914
915
916
917
918
919
920
921static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
922{
923 struct task_struct *task;
924 struct io_context *ioc;
925 int ret = 0;
926
927
928 cgroup_taskset_for_each(task, cgrp, tset) {
929 task_lock(task);
930 ioc = task->io_context;
931 if (ioc && atomic_read(&ioc->nr_tasks) > 1)
932 ret = -EINVAL;
933 task_unlock(task);
934 if (ret)
935 break;
936 }
937 return ret;
938}
939
940struct cgroup_subsys blkio_subsys = {
941 .name = "blkio",
942 .css_alloc = blkcg_css_alloc,
943 .css_offline = blkcg_css_offline,
944 .css_free = blkcg_css_free,
945 .can_attach = blkcg_can_attach,
946 .subsys_id = blkio_subsys_id,
947 .base_cftypes = blkcg_files,
948 .module = THIS_MODULE,
949
950
951
952
953
954
955
956 .broken_hierarchy = true,
957};
958EXPORT_SYMBOL_GPL(blkio_subsys);
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976int blkcg_activate_policy(struct request_queue *q,
977 const struct blkcg_policy *pol)
978{
979 LIST_HEAD(pds);
980 struct blkcg_gq *blkg, *new_blkg;
981 struct blkg_policy_data *pd, *n;
982 int cnt = 0, ret;
983 bool preloaded;
984
985 if (blkcg_policy_enabled(q, pol))
986 return 0;
987
988
989 new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
990 if (!new_blkg)
991 return -ENOMEM;
992
993 blk_queue_bypass_start(q);
994
995 preloaded = !radix_tree_preload(GFP_KERNEL);
996
997
998
999
1000
1001
1002 spin_lock_irq(q->queue_lock);
1003
1004 rcu_read_lock();
1005 blkg = __blkg_lookup(&blkcg_root, q, false);
1006 if (blkg)
1007 blkg_free(new_blkg);
1008 else
1009 blkg = blkg_create(&blkcg_root, q, new_blkg);
1010 rcu_read_unlock();
1011
1012 if (preloaded)
1013 radix_tree_preload_end();
1014
1015 if (IS_ERR(blkg)) {
1016 ret = PTR_ERR(blkg);
1017 goto out_unlock;
1018 }
1019 q->root_blkg = blkg;
1020 q->root_rl.blkg = blkg;
1021
1022 list_for_each_entry(blkg, &q->blkg_list, q_node)
1023 cnt++;
1024
1025 spin_unlock_irq(q->queue_lock);
1026
1027
1028 while (cnt--) {
1029 pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
1030 if (!pd) {
1031 ret = -ENOMEM;
1032 goto out_free;
1033 }
1034 list_add_tail(&pd->alloc_node, &pds);
1035 }
1036
1037
1038
1039
1040
1041 spin_lock_irq(q->queue_lock);
1042
1043 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1044 if (WARN_ON(list_empty(&pds))) {
1045
1046 ret = -ENOMEM;
1047 goto out_unlock;
1048 }
1049 pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
1050 list_del_init(&pd->alloc_node);
1051
1052
1053 spin_lock(&blkg->blkcg->lock);
1054
1055 blkg->pd[pol->plid] = pd;
1056 pd->blkg = blkg;
1057 pd->plid = pol->plid;
1058 pol->pd_init_fn(blkg);
1059
1060 spin_unlock(&blkg->blkcg->lock);
1061 }
1062
1063 __set_bit(pol->plid, q->blkcg_pols);
1064 ret = 0;
1065out_unlock:
1066 spin_unlock_irq(q->queue_lock);
1067out_free:
1068 blk_queue_bypass_end(q);
1069 list_for_each_entry_safe(pd, n, &pds, alloc_node)
1070 kfree(pd);
1071 return ret;
1072}
1073EXPORT_SYMBOL_GPL(blkcg_activate_policy);
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083void blkcg_deactivate_policy(struct request_queue *q,
1084 const struct blkcg_policy *pol)
1085{
1086 struct blkcg_gq *blkg;
1087
1088 if (!blkcg_policy_enabled(q, pol))
1089 return;
1090
1091 blk_queue_bypass_start(q);
1092 spin_lock_irq(q->queue_lock);
1093
1094 __clear_bit(pol->plid, q->blkcg_pols);
1095
1096
1097 if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
1098 blkg_destroy_all(q);
1099
1100 list_for_each_entry(blkg, &q->blkg_list, q_node) {
1101
1102 spin_lock(&blkg->blkcg->lock);
1103
1104 if (pol->pd_offline_fn)
1105 pol->pd_offline_fn(blkg);
1106 if (pol->pd_exit_fn)
1107 pol->pd_exit_fn(blkg);
1108
1109 kfree(blkg->pd[pol->plid]);
1110 blkg->pd[pol->plid] = NULL;
1111
1112 spin_unlock(&blkg->blkcg->lock);
1113 }
1114
1115 spin_unlock_irq(q->queue_lock);
1116 blk_queue_bypass_end(q);
1117}
1118EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
1119
1120
1121
1122
1123
1124
1125
1126
1127int blkcg_policy_register(struct blkcg_policy *pol)
1128{
1129 int i, ret;
1130
1131 if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
1132 return -EINVAL;
1133
1134 mutex_lock(&blkcg_pol_mutex);
1135
1136
1137 ret = -ENOSPC;
1138 for (i = 0; i < BLKCG_MAX_POLS; i++)
1139 if (!blkcg_policy[i])
1140 break;
1141 if (i >= BLKCG_MAX_POLS)
1142 goto out_unlock;
1143
1144
1145 pol->plid = i;
1146 blkcg_policy[i] = pol;
1147
1148
1149 if (pol->cftypes)
1150 WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes));
1151 ret = 0;
1152out_unlock:
1153 mutex_unlock(&blkcg_pol_mutex);
1154 return ret;
1155}
1156EXPORT_SYMBOL_GPL(blkcg_policy_register);
1157
1158
1159
1160
1161
1162
1163
1164void blkcg_policy_unregister(struct blkcg_policy *pol)
1165{
1166 mutex_lock(&blkcg_pol_mutex);
1167
1168 if (WARN_ON(blkcg_policy[pol->plid] != pol))
1169 goto out_unlock;
1170
1171
1172 if (pol->cftypes)
1173 cgroup_rm_cftypes(&blkio_subsys, pol->cftypes);
1174
1175
1176 blkcg_policy[pol->plid] = NULL;
1177out_unlock:
1178 mutex_unlock(&blkcg_pol_mutex);
1179}
1180EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
1181