1
2
3
4
5
6
7
8
9#include <linux/module.h>
10#include <linux/slab.h>
11#include <linux/blkdev.h>
12#include <linux/elevator.h>
13#include <linux/jiffies.h>
14#include <linux/rbtree.h>
15#include <linux/ioprio.h>
16#include <linux/blktrace_api.h>
17#include "cfq.h"
18
19
20
21
22
23static const int cfq_quantum = 8;
24static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
25
26static const int cfq_back_max = 16 * 1024;
27
28static const int cfq_back_penalty = 2;
29static const int cfq_slice_sync = HZ / 10;
30static int cfq_slice_async = HZ / 25;
31static const int cfq_slice_async_rq = 2;
32static int cfq_slice_idle = HZ / 125;
33static int cfq_group_idle = HZ / 125;
34static const int cfq_target_latency = HZ * 3/10;
35static const int cfq_hist_divisor = 4;
36
37
38
39
40#define CFQ_IDLE_DELAY (HZ / 5)
41
42
43
44
45#define CFQ_MIN_TT (2)
46
47#define CFQ_SLICE_SCALE (5)
48#define CFQ_HW_QUEUE_MIN (5)
49#define CFQ_SERVICE_SHIFT 12
50
51#define CFQQ_SEEK_THR (sector_t)(8 * 100)
52#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
53#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
54#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
55
56#define RQ_CIC(rq) \
57 ((struct cfq_io_context *) (rq)->elevator_private[0])
58#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1])
59#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2])
60
61static struct kmem_cache *cfq_pool;
62static struct kmem_cache *cfq_ioc_pool;
63
64static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
65static struct completion *ioc_gone;
66static DEFINE_SPINLOCK(ioc_gone_lock);
67
68static DEFINE_SPINLOCK(cic_index_lock);
69static DEFINE_IDA(cic_index_ida);
70
71#define CFQ_PRIO_LISTS IOPRIO_BE_NR
72#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
73#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
74
75#define sample_valid(samples) ((samples) > 80)
76#define rb_entry_cfqg(node) rb_entry((node), struct cfq_group, rb_node)
77
78
79
80
81
82
83
84struct cfq_rb_root {
85 struct rb_root rb;
86 struct rb_node *left;
87 unsigned count;
88 unsigned total_weight;
89 u64 min_vdisktime;
90 struct cfq_ttime ttime;
91};
92#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
93 .ttime = {.last_end_request = jiffies,},}
94
95
96
97
98struct cfq_queue {
99
100 int ref;
101
102 unsigned int flags;
103
104 struct cfq_data *cfqd;
105
106 struct rb_node rb_node;
107
108 unsigned long rb_key;
109
110 struct rb_node p_node;
111
112 struct rb_root *p_root;
113
114 struct rb_root sort_list;
115
116 struct request *next_rq;
117
118 int queued[2];
119
120 int allocated[2];
121
122 struct list_head fifo;
123
124
125 unsigned long dispatch_start;
126 unsigned int allocated_slice;
127 unsigned int slice_dispatch;
128
129 unsigned long slice_start;
130 unsigned long slice_end;
131 long slice_resid;
132
133
134 int prio_pending;
135
136 int dispatched;
137
138
139 unsigned short ioprio, org_ioprio;
140 unsigned short ioprio_class;
141
142 pid_t pid;
143
144 u32 seek_history;
145 sector_t last_request_pos;
146
147 struct cfq_rb_root *service_tree;
148 struct cfq_queue *new_cfqq;
149 struct cfq_group *cfqg;
150
151 unsigned long nr_sectors;
152};
153
154
155
156
157
158enum wl_prio_t {
159 BE_WORKLOAD = 0,
160 RT_WORKLOAD = 1,
161 IDLE_WORKLOAD = 2,
162 CFQ_PRIO_NR,
163};
164
165
166
167
168enum wl_type_t {
169 ASYNC_WORKLOAD = 0,
170 SYNC_NOIDLE_WORKLOAD = 1,
171 SYNC_WORKLOAD = 2
172};
173
174
175struct cfq_group {
176
177 struct rb_node rb_node;
178
179
180 u64 vdisktime;
181 unsigned int weight;
182 unsigned int new_weight;
183 bool needs_update;
184
185
186 int nr_cfqq;
187
188
189
190
191
192
193
194 unsigned int busy_queues_avg[CFQ_PRIO_NR];
195
196
197
198
199
200
201
202
203 struct cfq_rb_root service_trees[2][3];
204 struct cfq_rb_root service_tree_idle;
205
206 unsigned long saved_workload_slice;
207 enum wl_type_t saved_workload;
208 enum wl_prio_t saved_serving_prio;
209 struct blkio_group blkg;
210#ifdef CONFIG_CFQ_GROUP_IOSCHED
211 struct hlist_node cfqd_node;
212 int ref;
213#endif
214
215 int dispatched;
216 struct cfq_ttime ttime;
217};
218
219
220
221
222struct cfq_data {
223 struct request_queue *queue;
224
225 struct cfq_rb_root grp_service_tree;
226 struct cfq_group root_group;
227
228
229
230
231 enum wl_prio_t serving_prio;
232 enum wl_type_t serving_type;
233 unsigned long workload_expires;
234 struct cfq_group *serving_group;
235
236
237
238
239
240
241 struct rb_root prio_trees[CFQ_PRIO_LISTS];
242
243 unsigned int busy_queues;
244 unsigned int busy_sync_queues;
245
246 int rq_in_driver;
247 int rq_in_flight[2];
248
249
250
251
252 int rq_queued;
253 int hw_tag;
254
255
256
257
258
259
260 int hw_tag_est_depth;
261 unsigned int hw_tag_samples;
262
263
264
265
266 struct timer_list idle_slice_timer;
267 struct work_struct unplug_work;
268
269 struct cfq_queue *active_queue;
270 struct cfq_io_context *active_cic;
271
272
273
274
275 struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
276 struct cfq_queue *async_idle_cfqq;
277
278 sector_t last_position;
279
280
281
282
283 unsigned int cfq_quantum;
284 unsigned int cfq_fifo_expire[2];
285 unsigned int cfq_back_penalty;
286 unsigned int cfq_back_max;
287 unsigned int cfq_slice[2];
288 unsigned int cfq_slice_async_rq;
289 unsigned int cfq_slice_idle;
290 unsigned int cfq_group_idle;
291 unsigned int cfq_latency;
292
293 unsigned int cic_index;
294 struct list_head cic_list;
295
296
297
298
299 struct cfq_queue oom_cfqq;
300
301 unsigned long last_delayed_sync;
302
303
304 struct hlist_head cfqg_list;
305
306
307 unsigned int nr_blkcg_linked_grps;
308};
309
310static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
311
312static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
313 enum wl_prio_t prio,
314 enum wl_type_t type)
315{
316 if (!cfqg)
317 return NULL;
318
319 if (prio == IDLE_WORKLOAD)
320 return &cfqg->service_tree_idle;
321
322 return &cfqg->service_trees[prio][type];
323}
324
325enum cfqq_state_flags {
326 CFQ_CFQQ_FLAG_on_rr = 0,
327 CFQ_CFQQ_FLAG_wait_request,
328 CFQ_CFQQ_FLAG_must_dispatch,
329 CFQ_CFQQ_FLAG_must_alloc_slice,
330 CFQ_CFQQ_FLAG_fifo_expire,
331 CFQ_CFQQ_FLAG_idle_window,
332 CFQ_CFQQ_FLAG_prio_changed,
333 CFQ_CFQQ_FLAG_slice_new,
334 CFQ_CFQQ_FLAG_sync,
335 CFQ_CFQQ_FLAG_coop,
336 CFQ_CFQQ_FLAG_split_coop,
337 CFQ_CFQQ_FLAG_deep,
338 CFQ_CFQQ_FLAG_wait_busy,
339};
340
341#define CFQ_CFQQ_FNS(name) \
342static inline void cfq_mark_cfqq_##name(struct cfq_queue *cfqq) \
343{ \
344 (cfqq)->flags |= (1 << CFQ_CFQQ_FLAG_##name); \
345} \
346static inline void cfq_clear_cfqq_##name(struct cfq_queue *cfqq) \
347{ \
348 (cfqq)->flags &= ~(1 << CFQ_CFQQ_FLAG_##name); \
349} \
350static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
351{ \
352 return ((cfqq)->flags & (1 << CFQ_CFQQ_FLAG_##name)) != 0; \
353}
354
355CFQ_CFQQ_FNS(on_rr);
356CFQ_CFQQ_FNS(wait_request);
357CFQ_CFQQ_FNS(must_dispatch);
358CFQ_CFQQ_FNS(must_alloc_slice);
359CFQ_CFQQ_FNS(fifo_expire);
360CFQ_CFQQ_FNS(idle_window);
361CFQ_CFQQ_FNS(prio_changed);
362CFQ_CFQQ_FNS(slice_new);
363CFQ_CFQQ_FNS(sync);
364CFQ_CFQQ_FNS(coop);
365CFQ_CFQQ_FNS(split_coop);
366CFQ_CFQQ_FNS(deep);
367CFQ_CFQQ_FNS(wait_busy);
368#undef CFQ_CFQQ_FNS
369
370#ifdef CONFIG_CFQ_GROUP_IOSCHED
371#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
372 blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
373 cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
374 blkg_path(&(cfqq)->cfqg->blkg), ##args)
375
376#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) \
377 blk_add_trace_msg((cfqd)->queue, "%s " fmt, \
378 blkg_path(&(cfqg)->blkg), ##args) \
379
380#else
381#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
382 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
383#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0)
384#endif
385#define cfq_log(cfqd, fmt, args...) \
386 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
387
388
389#define for_each_cfqg_st(cfqg, i, j, st) \
390 for (i = 0; i <= IDLE_WORKLOAD; i++) \
391 for (j = 0, st = i < IDLE_WORKLOAD ? &cfqg->service_trees[i][j]\
392 : &cfqg->service_tree_idle; \
393 (i < IDLE_WORKLOAD && j <= SYNC_WORKLOAD) || \
394 (i == IDLE_WORKLOAD && j == 0); \
395 j++, st = i < IDLE_WORKLOAD ? \
396 &cfqg->service_trees[i][j]: NULL) \
397
398static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
399 struct cfq_ttime *ttime, bool group_idle)
400{
401 unsigned long slice;
402 if (!sample_valid(ttime->ttime_samples))
403 return false;
404 if (group_idle)
405 slice = cfqd->cfq_group_idle;
406 else
407 slice = cfqd->cfq_slice_idle;
408 return ttime->ttime_mean > slice;
409}
410
411static inline bool iops_mode(struct cfq_data *cfqd)
412{
413
414
415
416
417
418
419
420 if (!cfqd->cfq_slice_idle && cfqd->hw_tag)
421 return true;
422 else
423 return false;
424}
425
426static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq)
427{
428 if (cfq_class_idle(cfqq))
429 return IDLE_WORKLOAD;
430 if (cfq_class_rt(cfqq))
431 return RT_WORKLOAD;
432 return BE_WORKLOAD;
433}
434
435
436static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
437{
438 if (!cfq_cfqq_sync(cfqq))
439 return ASYNC_WORKLOAD;
440 if (!cfq_cfqq_idle_window(cfqq))
441 return SYNC_NOIDLE_WORKLOAD;
442 return SYNC_WORKLOAD;
443}
444
445static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
446 struct cfq_data *cfqd,
447 struct cfq_group *cfqg)
448{
449 if (wl == IDLE_WORKLOAD)
450 return cfqg->service_tree_idle.count;
451
452 return cfqg->service_trees[wl][ASYNC_WORKLOAD].count
453 + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count
454 + cfqg->service_trees[wl][SYNC_WORKLOAD].count;
455}
456
457static inline int cfqg_busy_async_queues(struct cfq_data *cfqd,
458 struct cfq_group *cfqg)
459{
460 return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count
461 + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count;
462}
463
464static void cfq_dispatch_insert(struct request_queue *, struct request *);
465static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
466 struct io_context *, gfp_t);
467static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
468 struct io_context *);
469
470static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
471 bool is_sync)
472{
473 return cic->cfqq[is_sync];
474}
475
476static inline void cic_set_cfqq(struct cfq_io_context *cic,
477 struct cfq_queue *cfqq, bool is_sync)
478{
479 cic->cfqq[is_sync] = cfqq;
480}
481
482#define CIC_DEAD_KEY 1ul
483#define CIC_DEAD_INDEX_SHIFT 1
484
485static inline void *cfqd_dead_key(struct cfq_data *cfqd)
486{
487 return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
488}
489
490static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
491{
492 struct cfq_data *cfqd = cic->key;
493
494 if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
495 return NULL;
496
497 return cfqd;
498}
499
500
501
502
503
504static inline bool cfq_bio_sync(struct bio *bio)
505{
506 return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC);
507}
508
509
510
511
512
513static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
514{
515 if (cfqd->busy_queues) {
516 cfq_log(cfqd, "schedule dispatch");
517 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
518 }
519}
520
521
522
523
524
525
526static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
527 unsigned short prio)
528{
529 const int base_slice = cfqd->cfq_slice[sync];
530
531 WARN_ON(prio >= IOPRIO_BE_NR);
532
533 return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
534}
535
536static inline int
537cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
538{
539 return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
540}
541
542static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg)
543{
544 u64 d = delta << CFQ_SERVICE_SHIFT;
545
546 d = d * BLKIO_WEIGHT_DEFAULT;
547 do_div(d, cfqg->weight);
548 return d;
549}
550
551static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
552{
553 s64 delta = (s64)(vdisktime - min_vdisktime);
554 if (delta > 0)
555 min_vdisktime = vdisktime;
556
557 return min_vdisktime;
558}
559
560static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime)
561{
562 s64 delta = (s64)(vdisktime - min_vdisktime);
563 if (delta < 0)
564 min_vdisktime = vdisktime;
565
566 return min_vdisktime;
567}
568
569static void update_min_vdisktime(struct cfq_rb_root *st)
570{
571 struct cfq_group *cfqg;
572
573 if (st->left) {
574 cfqg = rb_entry_cfqg(st->left);
575 st->min_vdisktime = max_vdisktime(st->min_vdisktime,
576 cfqg->vdisktime);
577 }
578}
579
580
581
582
583
584
585
586static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
587 struct cfq_group *cfqg, bool rt)
588{
589 unsigned min_q, max_q;
590 unsigned mult = cfq_hist_divisor - 1;
591 unsigned round = cfq_hist_divisor / 2;
592 unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
593
594 min_q = min(cfqg->busy_queues_avg[rt], busy);
595 max_q = max(cfqg->busy_queues_avg[rt], busy);
596 cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
597 cfq_hist_divisor;
598 return cfqg->busy_queues_avg[rt];
599}
600
601static inline unsigned
602cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
603{
604 struct cfq_rb_root *st = &cfqd->grp_service_tree;
605
606 return cfq_target_latency * cfqg->weight / st->total_weight;
607}
608
609static inline unsigned
610cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
611{
612 unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
613 if (cfqd->cfq_latency) {
614
615
616
617
618 unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
619 cfq_class_rt(cfqq));
620 unsigned sync_slice = cfqd->cfq_slice[1];
621 unsigned expect_latency = sync_slice * iq;
622 unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
623
624 if (expect_latency > group_slice) {
625 unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
626
627
628 unsigned low_slice =
629 min(slice, base_low_slice * slice / sync_slice);
630
631
632 slice = max(slice * group_slice / expect_latency,
633 low_slice);
634 }
635 }
636 return slice;
637}
638
639static inline void
640cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
641{
642 unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
643
644 cfqq->slice_start = jiffies;
645 cfqq->slice_end = jiffies + slice;
646 cfqq->allocated_slice = slice;
647 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
648}
649
650
651
652
653
654
655static inline bool cfq_slice_used(struct cfq_queue *cfqq)
656{
657 if (cfq_cfqq_slice_new(cfqq))
658 return false;
659 if (time_before(jiffies, cfqq->slice_end))
660 return false;
661
662 return true;
663}
664
665
666
667
668
669
670static struct request *
671cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, sector_t last)
672{
673 sector_t s1, s2, d1 = 0, d2 = 0;
674 unsigned long back_max;
675#define CFQ_RQ1_WRAP 0x01
676#define CFQ_RQ2_WRAP 0x02
677 unsigned wrap = 0;
678
679 if (rq1 == NULL || rq1 == rq2)
680 return rq2;
681 if (rq2 == NULL)
682 return rq1;
683
684 if (rq_is_sync(rq1) != rq_is_sync(rq2))
685 return rq_is_sync(rq1) ? rq1 : rq2;
686
687 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO)
688 return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2;
689
690 s1 = blk_rq_pos(rq1);
691 s2 = blk_rq_pos(rq2);
692
693
694
695
696 back_max = cfqd->cfq_back_max * 2;
697
698
699
700
701
702
703 if (s1 >= last)
704 d1 = s1 - last;
705 else if (s1 + back_max >= last)
706 d1 = (last - s1) * cfqd->cfq_back_penalty;
707 else
708 wrap |= CFQ_RQ1_WRAP;
709
710 if (s2 >= last)
711 d2 = s2 - last;
712 else if (s2 + back_max >= last)
713 d2 = (last - s2) * cfqd->cfq_back_penalty;
714 else
715 wrap |= CFQ_RQ2_WRAP;
716
717
718
719
720
721
722
723 switch (wrap) {
724 case 0:
725 if (d1 < d2)
726 return rq1;
727 else if (d2 < d1)
728 return rq2;
729 else {
730 if (s1 >= s2)
731 return rq1;
732 else
733 return rq2;
734 }
735
736 case CFQ_RQ2_WRAP:
737 return rq1;
738 case CFQ_RQ1_WRAP:
739 return rq2;
740 case (CFQ_RQ1_WRAP|CFQ_RQ2_WRAP):
741 default:
742
743
744
745
746
747
748 if (s1 <= s2)
749 return rq1;
750 else
751 return rq2;
752 }
753}
754
755
756
757
758static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
759{
760
761 if (!root->count)
762 return NULL;
763
764 if (!root->left)
765 root->left = rb_first(&root->rb);
766
767 if (root->left)
768 return rb_entry(root->left, struct cfq_queue, rb_node);
769
770 return NULL;
771}
772
773static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root)
774{
775 if (!root->left)
776 root->left = rb_first(&root->rb);
777
778 if (root->left)
779 return rb_entry_cfqg(root->left);
780
781 return NULL;
782}
783
784static void rb_erase_init(struct rb_node *n, struct rb_root *root)
785{
786 rb_erase(n, root);
787 RB_CLEAR_NODE(n);
788}
789
790static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
791{
792 if (root->left == n)
793 root->left = NULL;
794 rb_erase_init(n, &root->rb);
795 --root->count;
796}
797
798
799
800
801static struct request *
802cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
803 struct request *last)
804{
805 struct rb_node *rbnext = rb_next(&last->rb_node);
806 struct rb_node *rbprev = rb_prev(&last->rb_node);
807 struct request *next = NULL, *prev = NULL;
808
809 BUG_ON(RB_EMPTY_NODE(&last->rb_node));
810
811 if (rbprev)
812 prev = rb_entry_rq(rbprev);
813
814 if (rbnext)
815 next = rb_entry_rq(rbnext);
816 else {
817 rbnext = rb_first(&cfqq->sort_list);
818 if (rbnext && rbnext != &last->rb_node)
819 next = rb_entry_rq(rbnext);
820 }
821
822 return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
823}
824
825static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
826 struct cfq_queue *cfqq)
827{
828
829
830
831 return (cfqq->cfqg->nr_cfqq - 1) * (cfq_prio_slice(cfqd, 1, 0) -
832 cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio));
833}
834
835static inline s64
836cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg)
837{
838 return cfqg->vdisktime - st->min_vdisktime;
839}
840
841static void
842__cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
843{
844 struct rb_node **node = &st->rb.rb_node;
845 struct rb_node *parent = NULL;
846 struct cfq_group *__cfqg;
847 s64 key = cfqg_key(st, cfqg);
848 int left = 1;
849
850 while (*node != NULL) {
851 parent = *node;
852 __cfqg = rb_entry_cfqg(parent);
853
854 if (key < cfqg_key(st, __cfqg))
855 node = &parent->rb_left;
856 else {
857 node = &parent->rb_right;
858 left = 0;
859 }
860 }
861
862 if (left)
863 st->left = &cfqg->rb_node;
864
865 rb_link_node(&cfqg->rb_node, parent, node);
866 rb_insert_color(&cfqg->rb_node, &st->rb);
867}
868
869static void
870cfq_update_group_weight(struct cfq_group *cfqg)
871{
872 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
873 if (cfqg->needs_update) {
874 cfqg->weight = cfqg->new_weight;
875 cfqg->needs_update = false;
876 }
877}
878
879static void
880cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg)
881{
882 BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node));
883
884 cfq_update_group_weight(cfqg);
885 __cfq_group_service_tree_add(st, cfqg);
886 st->total_weight += cfqg->weight;
887}
888
889static void
890cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
891{
892 struct cfq_rb_root *st = &cfqd->grp_service_tree;
893 struct cfq_group *__cfqg;
894 struct rb_node *n;
895
896 cfqg->nr_cfqq++;
897 if (!RB_EMPTY_NODE(&cfqg->rb_node))
898 return;
899
900
901
902
903
904
905 n = rb_last(&st->rb);
906 if (n) {
907 __cfqg = rb_entry_cfqg(n);
908 cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY;
909 } else
910 cfqg->vdisktime = st->min_vdisktime;
911 cfq_group_service_tree_add(st, cfqg);
912}
913
914static void
915cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg)
916{
917 st->total_weight -= cfqg->weight;
918 if (!RB_EMPTY_NODE(&cfqg->rb_node))
919 cfq_rb_erase(&cfqg->rb_node, st);
920}
921
922static void
923cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
924{
925 struct cfq_rb_root *st = &cfqd->grp_service_tree;
926
927 BUG_ON(cfqg->nr_cfqq < 1);
928 cfqg->nr_cfqq--;
929
930
931 if (cfqg->nr_cfqq)
932 return;
933
934 cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
935 cfq_group_service_tree_del(st, cfqg);
936 cfqg->saved_workload_slice = 0;
937 cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
938}
939
940static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
941 unsigned int *unaccounted_time)
942{
943 unsigned int slice_used;
944
945
946
947
948
949 if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
950
951
952
953
954
955
956 slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
957 1);
958 } else {
959 slice_used = jiffies - cfqq->slice_start;
960 if (slice_used > cfqq->allocated_slice) {
961 *unaccounted_time = slice_used - cfqq->allocated_slice;
962 slice_used = cfqq->allocated_slice;
963 }
964 if (time_after(cfqq->slice_start, cfqq->dispatch_start))
965 *unaccounted_time += cfqq->slice_start -
966 cfqq->dispatch_start;
967 }
968
969 return slice_used;
970}
971
972static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
973 struct cfq_queue *cfqq)
974{
975 struct cfq_rb_root *st = &cfqd->grp_service_tree;
976 unsigned int used_sl, charge, unaccounted_sl = 0;
977 int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
978 - cfqg->service_tree_idle.count;
979
980 BUG_ON(nr_sync < 0);
981 used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
982
983 if (iops_mode(cfqd))
984 charge = cfqq->slice_dispatch;
985 else if (!cfq_cfqq_sync(cfqq) && !nr_sync)
986 charge = cfqq->allocated_slice;
987
988
989 cfq_group_service_tree_del(st, cfqg);
990 cfqg->vdisktime += cfq_scale_slice(charge, cfqg);
991
992 cfq_group_service_tree_add(st, cfqg);
993
994
995 if (time_after(cfqd->workload_expires, jiffies)) {
996 cfqg->saved_workload_slice = cfqd->workload_expires
997 - jiffies;
998 cfqg->saved_workload = cfqd->serving_type;
999 cfqg->saved_serving_prio = cfqd->serving_prio;
1000 } else
1001 cfqg->saved_workload_slice = 0;
1002
1003 cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
1004 st->min_vdisktime);
1005 cfq_log_cfqq(cfqq->cfqd, cfqq,
1006 "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
1007 used_sl, cfqq->slice_dispatch, charge,
1008 iops_mode(cfqd), cfqq->nr_sectors);
1009 cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl,
1010 unaccounted_sl);
1011 cfq_blkiocg_set_start_empty_time(&cfqg->blkg);
1012}
1013
1014#ifdef CONFIG_CFQ_GROUP_IOSCHED
1015static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
1016{
1017 if (blkg)
1018 return container_of(blkg, struct cfq_group, blkg);
1019 return NULL;
1020}
1021
1022static void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg,
1023 unsigned int weight)
1024{
1025 struct cfq_group *cfqg = cfqg_of_blkg(blkg);
1026 cfqg->new_weight = weight;
1027 cfqg->needs_update = true;
1028}
1029
1030static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
1031 struct cfq_group *cfqg, struct blkio_cgroup *blkcg)
1032{
1033 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
1034 unsigned int major, minor;
1035
1036
1037
1038
1039
1040
1041
1042 if (bdi->dev) {
1043 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1044 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
1045 (void *)cfqd, MKDEV(major, minor));
1046 } else
1047 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
1048 (void *)cfqd, 0);
1049
1050 cfqd->nr_blkcg_linked_grps++;
1051 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1052
1053
1054 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
1055}
1056
1057
1058
1059
1060
1061
1062static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
1063{
1064 struct cfq_group *cfqg = NULL;
1065 int i, j, ret;
1066 struct cfq_rb_root *st;
1067
1068 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
1069 if (!cfqg)
1070 return NULL;
1071
1072 for_each_cfqg_st(cfqg, i, j, st)
1073 *st = CFQ_RB_ROOT;
1074 RB_CLEAR_NODE(&cfqg->rb_node);
1075
1076 cfqg->ttime.last_end_request = jiffies;
1077
1078
1079
1080
1081
1082
1083
1084 cfqg->ref = 1;
1085
1086 ret = blkio_alloc_blkg_stats(&cfqg->blkg);
1087 if (ret) {
1088 kfree(cfqg);
1089 return NULL;
1090 }
1091
1092 return cfqg;
1093}
1094
1095static struct cfq_group *
1096cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
1097{
1098 struct cfq_group *cfqg = NULL;
1099 void *key = cfqd;
1100 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
1101 unsigned int major, minor;
1102
1103
1104
1105
1106
1107 if (blkcg == &blkio_root_cgroup)
1108 cfqg = &cfqd->root_group;
1109 else
1110 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
1111
1112 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
1113 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1114 cfqg->blkg.dev = MKDEV(major, minor);
1115 }
1116
1117 return cfqg;
1118}
1119
1120
1121
1122
1123
1124static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
1125{
1126 struct blkio_cgroup *blkcg;
1127 struct cfq_group *cfqg = NULL, *__cfqg = NULL;
1128 struct request_queue *q = cfqd->queue;
1129
1130 rcu_read_lock();
1131 blkcg = task_blkio_cgroup(current);
1132 cfqg = cfq_find_cfqg(cfqd, blkcg);
1133 if (cfqg) {
1134 rcu_read_unlock();
1135 return cfqg;
1136 }
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 rcu_read_unlock();
1149 spin_unlock_irq(q->queue_lock);
1150
1151 cfqg = cfq_alloc_cfqg(cfqd);
1152
1153 spin_lock_irq(q->queue_lock);
1154
1155 rcu_read_lock();
1156 blkcg = task_blkio_cgroup(current);
1157
1158
1159
1160
1161
1162 __cfqg = cfq_find_cfqg(cfqd, blkcg);
1163
1164 if (__cfqg) {
1165 kfree(cfqg);
1166 rcu_read_unlock();
1167 return __cfqg;
1168 }
1169
1170 if (!cfqg)
1171 cfqg = &cfqd->root_group;
1172
1173 cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
1174 rcu_read_unlock();
1175 return cfqg;
1176}
1177
1178static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1179{
1180 cfqg->ref++;
1181 return cfqg;
1182}
1183
1184static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
1185{
1186
1187 if (!cfq_cfqq_sync(cfqq))
1188 cfqg = &cfqq->cfqd->root_group;
1189
1190 cfqq->cfqg = cfqg;
1191
1192 cfqq->cfqg->ref++;
1193}
1194
1195static void cfq_put_cfqg(struct cfq_group *cfqg)
1196{
1197 struct cfq_rb_root *st;
1198 int i, j;
1199
1200 BUG_ON(cfqg->ref <= 0);
1201 cfqg->ref--;
1202 if (cfqg->ref)
1203 return;
1204 for_each_cfqg_st(cfqg, i, j, st)
1205 BUG_ON(!RB_EMPTY_ROOT(&st->rb));
1206 free_percpu(cfqg->blkg.stats_cpu);
1207 kfree(cfqg);
1208}
1209
1210static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1211{
1212
1213 BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
1214
1215 hlist_del_init(&cfqg->cfqd_node);
1216
1217 BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
1218 cfqd->nr_blkcg_linked_grps--;
1219
1220
1221
1222
1223
1224 cfq_put_cfqg(cfqg);
1225}
1226
1227static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1228{
1229 struct hlist_node *pos, *n;
1230 struct cfq_group *cfqg;
1231
1232 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
1233
1234
1235
1236
1237
1238 if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
1239 cfq_destroy_cfqg(cfqd, cfqg);
1240 }
1241}
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1258{
1259 unsigned long flags;
1260 struct cfq_data *cfqd = key;
1261
1262 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1263 cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
1264 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1265}
1266
1267#else
1268static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
1269{
1270 return &cfqd->root_group;
1271}
1272
1273static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg)
1274{
1275 return cfqg;
1276}
1277
1278static inline void
1279cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
1280 cfqq->cfqg = cfqg;
1281}
1282
1283static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
1284static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
1285
1286#endif
1287
1288
1289
1290
1291
1292
1293static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1294 bool add_front)
1295{
1296 struct rb_node **p, *parent;
1297 struct cfq_queue *__cfqq;
1298 unsigned long rb_key;
1299 struct cfq_rb_root *service_tree;
1300 int left;
1301 int new_cfqq = 1;
1302
1303 service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
1304 cfqq_type(cfqq));
1305 if (cfq_class_idle(cfqq)) {
1306 rb_key = CFQ_IDLE_DELAY;
1307 parent = rb_last(&service_tree->rb);
1308 if (parent && parent != &cfqq->rb_node) {
1309 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
1310 rb_key += __cfqq->rb_key;
1311 } else
1312 rb_key += jiffies;
1313 } else if (!add_front) {
1314
1315
1316
1317
1318
1319
1320 rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
1321 rb_key -= cfqq->slice_resid;
1322 cfqq->slice_resid = 0;
1323 } else {
1324 rb_key = -HZ;
1325 __cfqq = cfq_rb_first(service_tree);
1326 rb_key += __cfqq ? __cfqq->rb_key : jiffies;
1327 }
1328
1329 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
1330 new_cfqq = 0;
1331
1332
1333
1334 if (rb_key == cfqq->rb_key &&
1335 cfqq->service_tree == service_tree)
1336 return;
1337
1338 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
1339 cfqq->service_tree = NULL;
1340 }
1341
1342 left = 1;
1343 parent = NULL;
1344 cfqq->service_tree = service_tree;
1345 p = &service_tree->rb.rb_node;
1346 while (*p) {
1347 struct rb_node **n;
1348
1349 parent = *p;
1350 __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
1351
1352
1353
1354
1355 if (time_before(rb_key, __cfqq->rb_key))
1356 n = &(*p)->rb_left;
1357 else {
1358 n = &(*p)->rb_right;
1359 left = 0;
1360 }
1361
1362 p = n;
1363 }
1364
1365 if (left)
1366 service_tree->left = &cfqq->rb_node;
1367
1368 cfqq->rb_key = rb_key;
1369 rb_link_node(&cfqq->rb_node, parent, p);
1370 rb_insert_color(&cfqq->rb_node, &service_tree->rb);
1371 service_tree->count++;
1372 if (add_front || !new_cfqq)
1373 return;
1374 cfq_group_notify_queue_add(cfqd, cfqq->cfqg);
1375}
1376
1377static struct cfq_queue *
1378cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root,
1379 sector_t sector, struct rb_node **ret_parent,
1380 struct rb_node ***rb_link)
1381{
1382 struct rb_node **p, *parent;
1383 struct cfq_queue *cfqq = NULL;
1384
1385 parent = NULL;
1386 p = &root->rb_node;
1387 while (*p) {
1388 struct rb_node **n;
1389
1390 parent = *p;
1391 cfqq = rb_entry(parent, struct cfq_queue, p_node);
1392
1393
1394
1395
1396
1397 if (sector > blk_rq_pos(cfqq->next_rq))
1398 n = &(*p)->rb_right;
1399 else if (sector < blk_rq_pos(cfqq->next_rq))
1400 n = &(*p)->rb_left;
1401 else
1402 break;
1403 p = n;
1404 cfqq = NULL;
1405 }
1406
1407 *ret_parent = parent;
1408 if (rb_link)
1409 *rb_link = p;
1410 return cfqq;
1411}
1412
1413static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1414{
1415 struct rb_node **p, *parent;
1416 struct cfq_queue *__cfqq;
1417
1418 if (cfqq->p_root) {
1419 rb_erase(&cfqq->p_node, cfqq->p_root);
1420 cfqq->p_root = NULL;
1421 }
1422
1423 if (cfq_class_idle(cfqq))
1424 return;
1425 if (!cfqq->next_rq)
1426 return;
1427
1428 cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio];
1429 __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root,
1430 blk_rq_pos(cfqq->next_rq), &parent, &p);
1431 if (!__cfqq) {
1432 rb_link_node(&cfqq->p_node, parent, p);
1433 rb_insert_color(&cfqq->p_node, cfqq->p_root);
1434 } else
1435 cfqq->p_root = NULL;
1436}
1437
1438
1439
1440
1441static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1442{
1443
1444
1445
1446 if (cfq_cfqq_on_rr(cfqq)) {
1447 cfq_service_tree_add(cfqd, cfqq, 0);
1448 cfq_prio_tree_add(cfqd, cfqq);
1449 }
1450}
1451
1452
1453
1454
1455
1456static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1457{
1458 cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
1459 BUG_ON(cfq_cfqq_on_rr(cfqq));
1460 cfq_mark_cfqq_on_rr(cfqq);
1461 cfqd->busy_queues++;
1462 if (cfq_cfqq_sync(cfqq))
1463 cfqd->busy_sync_queues++;
1464
1465 cfq_resort_rr_list(cfqd, cfqq);
1466}
1467
1468
1469
1470
1471
1472static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1473{
1474 cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
1475 BUG_ON(!cfq_cfqq_on_rr(cfqq));
1476 cfq_clear_cfqq_on_rr(cfqq);
1477
1478 if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
1479 cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree);
1480 cfqq->service_tree = NULL;
1481 }
1482 if (cfqq->p_root) {
1483 rb_erase(&cfqq->p_node, cfqq->p_root);
1484 cfqq->p_root = NULL;
1485 }
1486
1487 cfq_group_notify_queue_del(cfqd, cfqq->cfqg);
1488 BUG_ON(!cfqd->busy_queues);
1489 cfqd->busy_queues--;
1490 if (cfq_cfqq_sync(cfqq))
1491 cfqd->busy_sync_queues--;
1492}
1493
1494
1495
1496
1497static void cfq_del_rq_rb(struct request *rq)
1498{
1499 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1500 const int sync = rq_is_sync(rq);
1501
1502 BUG_ON(!cfqq->queued[sync]);
1503 cfqq->queued[sync]--;
1504
1505 elv_rb_del(&cfqq->sort_list, rq);
1506
1507 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) {
1508
1509
1510
1511
1512
1513 if (cfqq->p_root) {
1514 rb_erase(&cfqq->p_node, cfqq->p_root);
1515 cfqq->p_root = NULL;
1516 }
1517 }
1518}
1519
1520static void cfq_add_rq_rb(struct request *rq)
1521{
1522 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1523 struct cfq_data *cfqd = cfqq->cfqd;
1524 struct request *prev;
1525
1526 cfqq->queued[rq_is_sync(rq)]++;
1527
1528 elv_rb_add(&cfqq->sort_list, rq);
1529
1530 if (!cfq_cfqq_on_rr(cfqq))
1531 cfq_add_cfqq_rr(cfqd, cfqq);
1532
1533
1534
1535
1536 prev = cfqq->next_rq;
1537 cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq, cfqd->last_position);
1538
1539
1540
1541
1542 if (prev != cfqq->next_rq)
1543 cfq_prio_tree_add(cfqd, cfqq);
1544
1545 BUG_ON(!cfqq->next_rq);
1546}
1547
1548static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
1549{
1550 elv_rb_del(&cfqq->sort_list, rq);
1551 cfqq->queued[rq_is_sync(rq)]--;
1552 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1553 rq_data_dir(rq), rq_is_sync(rq));
1554 cfq_add_rq_rb(rq);
1555 cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
1556 &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq),
1557 rq_is_sync(rq));
1558}
1559
1560static struct request *
1561cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio)
1562{
1563 struct task_struct *tsk = current;
1564 struct cfq_io_context *cic;
1565 struct cfq_queue *cfqq;
1566
1567 cic = cfq_cic_lookup(cfqd, tsk->io_context);
1568 if (!cic)
1569 return NULL;
1570
1571 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
1572 if (cfqq) {
1573 sector_t sector = bio->bi_sector + bio_sectors(bio);
1574
1575 return elv_rb_find(&cfqq->sort_list, sector);
1576 }
1577
1578 return NULL;
1579}
1580
1581static void cfq_activate_request(struct request_queue *q, struct request *rq)
1582{
1583 struct cfq_data *cfqd = q->elevator->elevator_data;
1584
1585 cfqd->rq_in_driver++;
1586 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
1587 cfqd->rq_in_driver);
1588
1589 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
1590}
1591
1592static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
1593{
1594 struct cfq_data *cfqd = q->elevator->elevator_data;
1595
1596 WARN_ON(!cfqd->rq_in_driver);
1597 cfqd->rq_in_driver--;
1598 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
1599 cfqd->rq_in_driver);
1600}
1601
1602static void cfq_remove_request(struct request *rq)
1603{
1604 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1605
1606 if (cfqq->next_rq == rq)
1607 cfqq->next_rq = cfq_find_next_rq(cfqq->cfqd, cfqq, rq);
1608
1609 list_del_init(&rq->queuelist);
1610 cfq_del_rq_rb(rq);
1611
1612 cfqq->cfqd->rq_queued--;
1613 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1614 rq_data_dir(rq), rq_is_sync(rq));
1615 if (rq->cmd_flags & REQ_PRIO) {
1616 WARN_ON(!cfqq->prio_pending);
1617 cfqq->prio_pending--;
1618 }
1619}
1620
1621static int cfq_merge(struct request_queue *q, struct request **req,
1622 struct bio *bio)
1623{
1624 struct cfq_data *cfqd = q->elevator->elevator_data;
1625 struct request *__rq;
1626
1627 __rq = cfq_find_rq_fmerge(cfqd, bio);
1628 if (__rq && elv_rq_merge_ok(__rq, bio)) {
1629 *req = __rq;
1630 return ELEVATOR_FRONT_MERGE;
1631 }
1632
1633 return ELEVATOR_NO_MERGE;
1634}
1635
1636static void cfq_merged_request(struct request_queue *q, struct request *req,
1637 int type)
1638{
1639 if (type == ELEVATOR_FRONT_MERGE) {
1640 struct cfq_queue *cfqq = RQ_CFQQ(req);
1641
1642 cfq_reposition_rq_rb(cfqq, req);
1643 }
1644}
1645
1646static void cfq_bio_merged(struct request_queue *q, struct request *req,
1647 struct bio *bio)
1648{
1649 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg,
1650 bio_data_dir(bio), cfq_bio_sync(bio));
1651}
1652
1653static void
1654cfq_merged_requests(struct request_queue *q, struct request *rq,
1655 struct request *next)
1656{
1657 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1658 struct cfq_data *cfqd = q->elevator->elevator_data;
1659
1660
1661
1662
1663 if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
1664 time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
1665 list_move(&rq->queuelist, &next->queuelist);
1666 rq_set_fifo_time(rq, rq_fifo_time(next));
1667 }
1668
1669 if (cfqq->next_rq == next)
1670 cfqq->next_rq = rq;
1671 cfq_remove_request(next);
1672 cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg,
1673 rq_data_dir(next), rq_is_sync(next));
1674
1675 cfqq = RQ_CFQQ(next);
1676
1677
1678
1679
1680
1681 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) &&
1682 cfqq != cfqd->active_queue)
1683 cfq_del_cfqq_rr(cfqd, cfqq);
1684}
1685
1686static int cfq_allow_merge(struct request_queue *q, struct request *rq,
1687 struct bio *bio)
1688{
1689 struct cfq_data *cfqd = q->elevator->elevator_data;
1690 struct cfq_io_context *cic;
1691 struct cfq_queue *cfqq;
1692
1693
1694
1695
1696 if (cfq_bio_sync(bio) && !rq_is_sync(rq))
1697 return false;
1698
1699
1700
1701
1702
1703 cic = cfq_cic_lookup(cfqd, current->io_context);
1704 if (!cic)
1705 return false;
1706
1707 cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
1708 return cfqq == RQ_CFQQ(rq);
1709}
1710
1711static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1712{
1713 del_timer(&cfqd->idle_slice_timer);
1714 cfq_blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
1715}
1716
1717static void __cfq_set_active_queue(struct cfq_data *cfqd,
1718 struct cfq_queue *cfqq)
1719{
1720 if (cfqq) {
1721 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
1722 cfqd->serving_prio, cfqd->serving_type);
1723 cfq_blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
1724 cfqq->slice_start = 0;
1725 cfqq->dispatch_start = jiffies;
1726 cfqq->allocated_slice = 0;
1727 cfqq->slice_end = 0;
1728 cfqq->slice_dispatch = 0;
1729 cfqq->nr_sectors = 0;
1730
1731 cfq_clear_cfqq_wait_request(cfqq);
1732 cfq_clear_cfqq_must_dispatch(cfqq);
1733 cfq_clear_cfqq_must_alloc_slice(cfqq);
1734 cfq_clear_cfqq_fifo_expire(cfqq);
1735 cfq_mark_cfqq_slice_new(cfqq);
1736
1737 cfq_del_timer(cfqd, cfqq);
1738 }
1739
1740 cfqd->active_queue = cfqq;
1741}
1742
1743
1744
1745
1746static void
1747__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1748 bool timed_out)
1749{
1750 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
1751
1752 if (cfq_cfqq_wait_request(cfqq))
1753 cfq_del_timer(cfqd, cfqq);
1754
1755 cfq_clear_cfqq_wait_request(cfqq);
1756 cfq_clear_cfqq_wait_busy(cfqq);
1757
1758
1759
1760
1761
1762
1763
1764 if (cfq_cfqq_coop(cfqq) && CFQQ_SEEKY(cfqq))
1765 cfq_mark_cfqq_split_coop(cfqq);
1766
1767
1768
1769
1770 if (timed_out) {
1771 if (cfq_cfqq_slice_new(cfqq))
1772 cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
1773 else
1774 cfqq->slice_resid = cfqq->slice_end - jiffies;
1775 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
1776 }
1777
1778 cfq_group_served(cfqd, cfqq->cfqg, cfqq);
1779
1780 if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
1781 cfq_del_cfqq_rr(cfqd, cfqq);
1782
1783 cfq_resort_rr_list(cfqd, cfqq);
1784
1785 if (cfqq == cfqd->active_queue)
1786 cfqd->active_queue = NULL;
1787
1788 if (cfqd->active_cic) {
1789 put_io_context(cfqd->active_cic->ioc);
1790 cfqd->active_cic = NULL;
1791 }
1792}
1793
1794static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
1795{
1796 struct cfq_queue *cfqq = cfqd->active_queue;
1797
1798 if (cfqq)
1799 __cfq_slice_expired(cfqd, cfqq, timed_out);
1800}
1801
1802
1803
1804
1805
1806static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
1807{
1808 struct cfq_rb_root *service_tree =
1809 service_tree_for(cfqd->serving_group, cfqd->serving_prio,
1810 cfqd->serving_type);
1811
1812 if (!cfqd->rq_queued)
1813 return NULL;
1814
1815
1816 if (!service_tree)
1817 return NULL;
1818 if (RB_EMPTY_ROOT(&service_tree->rb))
1819 return NULL;
1820 return cfq_rb_first(service_tree);
1821}
1822
1823static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
1824{
1825 struct cfq_group *cfqg;
1826 struct cfq_queue *cfqq;
1827 int i, j;
1828 struct cfq_rb_root *st;
1829
1830 if (!cfqd->rq_queued)
1831 return NULL;
1832
1833 cfqg = cfq_get_next_cfqg(cfqd);
1834 if (!cfqg)
1835 return NULL;
1836
1837 for_each_cfqg_st(cfqg, i, j, st)
1838 if ((cfqq = cfq_rb_first(st)) != NULL)
1839 return cfqq;
1840 return NULL;
1841}
1842
1843
1844
1845
1846static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
1847 struct cfq_queue *cfqq)
1848{
1849 if (!cfqq)
1850 cfqq = cfq_get_next_queue(cfqd);
1851
1852 __cfq_set_active_queue(cfqd, cfqq);
1853 return cfqq;
1854}
1855
1856static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
1857 struct request *rq)
1858{
1859 if (blk_rq_pos(rq) >= cfqd->last_position)
1860 return blk_rq_pos(rq) - cfqd->last_position;
1861 else
1862 return cfqd->last_position - blk_rq_pos(rq);
1863}
1864
1865static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1866 struct request *rq)
1867{
1868 return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
1869}
1870
1871static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
1872 struct cfq_queue *cur_cfqq)
1873{
1874 struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio];
1875 struct rb_node *parent, *node;
1876 struct cfq_queue *__cfqq;
1877 sector_t sector = cfqd->last_position;
1878
1879 if (RB_EMPTY_ROOT(root))
1880 return NULL;
1881
1882
1883
1884
1885
1886 __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL);
1887 if (__cfqq)
1888 return __cfqq;
1889
1890
1891
1892
1893
1894 __cfqq = rb_entry(parent, struct cfq_queue, p_node);
1895 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1896 return __cfqq;
1897
1898 if (blk_rq_pos(__cfqq->next_rq) < sector)
1899 node = rb_next(&__cfqq->p_node);
1900 else
1901 node = rb_prev(&__cfqq->p_node);
1902 if (!node)
1903 return NULL;
1904
1905 __cfqq = rb_entry(node, struct cfq_queue, p_node);
1906 if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
1907 return __cfqq;
1908
1909 return NULL;
1910}
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
1923 struct cfq_queue *cur_cfqq)
1924{
1925 struct cfq_queue *cfqq;
1926
1927 if (cfq_class_idle(cur_cfqq))
1928 return NULL;
1929 if (!cfq_cfqq_sync(cur_cfqq))
1930 return NULL;
1931 if (CFQQ_SEEKY(cur_cfqq))
1932 return NULL;
1933
1934
1935
1936
1937 if (cur_cfqq->cfqg->nr_cfqq == 1)
1938 return NULL;
1939
1940
1941
1942
1943
1944
1945 cfqq = cfqq_close(cfqd, cur_cfqq);
1946 if (!cfqq)
1947 return NULL;
1948
1949
1950 if (cur_cfqq->cfqg != cfqq->cfqg)
1951 return NULL;
1952
1953
1954
1955
1956 if (!cfq_cfqq_sync(cfqq))
1957 return NULL;
1958 if (CFQQ_SEEKY(cfqq))
1959 return NULL;
1960
1961
1962
1963
1964 if (cfq_class_rt(cfqq) != cfq_class_rt(cur_cfqq))
1965 return NULL;
1966
1967 return cfqq;
1968}
1969
1970
1971
1972
1973
1974static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1975{
1976 enum wl_prio_t prio = cfqq_prio(cfqq);
1977 struct cfq_rb_root *service_tree = cfqq->service_tree;
1978
1979 BUG_ON(!service_tree);
1980 BUG_ON(!service_tree->count);
1981
1982 if (!cfqd->cfq_slice_idle)
1983 return false;
1984
1985
1986 if (prio == IDLE_WORKLOAD)
1987 return false;
1988
1989
1990 if (cfq_cfqq_idle_window(cfqq) &&
1991 !(blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag))
1992 return true;
1993
1994
1995
1996
1997
1998 if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) &&
1999 !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false))
2000 return true;
2001 cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
2002 service_tree->count);
2003 return false;
2004}
2005
2006static void cfq_arm_slice_timer(struct cfq_data *cfqd)
2007{
2008 struct cfq_queue *cfqq = cfqd->active_queue;
2009 struct cfq_io_context *cic;
2010 unsigned long sl, group_idle = 0;
2011
2012
2013
2014
2015
2016
2017 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
2018 return;
2019
2020 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
2021 WARN_ON(cfq_cfqq_slice_new(cfqq));
2022
2023
2024
2025
2026 if (!cfq_should_idle(cfqd, cfqq)) {
2027
2028 if (cfqd->cfq_group_idle)
2029 group_idle = cfqd->cfq_group_idle;
2030 else
2031 return;
2032 }
2033
2034
2035
2036
2037 if (cfqq->dispatched)
2038 return;
2039
2040
2041
2042
2043 cic = cfqd->active_cic;
2044 if (!cic || !atomic_read(&cic->ioc->nr_tasks))
2045 return;
2046
2047
2048
2049
2050
2051
2052 if (sample_valid(cic->ttime.ttime_samples) &&
2053 (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
2054 cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
2055 cic->ttime.ttime_mean);
2056 return;
2057 }
2058
2059
2060 if (group_idle && cfqq->cfqg->nr_cfqq > 1)
2061 return;
2062
2063 cfq_mark_cfqq_wait_request(cfqq);
2064
2065 if (group_idle)
2066 sl = cfqd->cfq_group_idle;
2067 else
2068 sl = cfqd->cfq_slice_idle;
2069
2070 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
2071 cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
2072 cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
2073 group_idle ? 1 : 0);
2074}
2075
2076
2077
2078
2079static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
2080{
2081 struct cfq_data *cfqd = q->elevator->elevator_data;
2082 struct cfq_queue *cfqq = RQ_CFQQ(rq);
2083
2084 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
2085
2086 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
2087 cfq_remove_request(rq);
2088 cfqq->dispatched++;
2089 (RQ_CFQG(rq))->dispatched++;
2090 elv_dispatch_sort(q, rq);
2091
2092 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
2093 cfqq->nr_sectors += blk_rq_sectors(rq);
2094 cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
2095 rq_data_dir(rq), rq_is_sync(rq));
2096}
2097
2098
2099
2100
2101static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
2102{
2103 struct request *rq = NULL;
2104
2105 if (cfq_cfqq_fifo_expire(cfqq))
2106 return NULL;
2107
2108 cfq_mark_cfqq_fifo_expire(cfqq);
2109
2110 if (list_empty(&cfqq->fifo))
2111 return NULL;
2112
2113 rq = rq_entry_fifo(cfqq->fifo.next);
2114 if (time_before(jiffies, rq_fifo_time(rq)))
2115 rq = NULL;
2116
2117 cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
2118 return rq;
2119}
2120
2121static inline int
2122cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2123{
2124 const int base_rq = cfqd->cfq_slice_async_rq;
2125
2126 WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
2127
2128 return 2 * base_rq * (IOPRIO_BE_NR - cfqq->ioprio);
2129}
2130
2131
2132
2133
2134static int cfqq_process_refs(struct cfq_queue *cfqq)
2135{
2136 int process_refs, io_refs;
2137
2138 io_refs = cfqq->allocated[READ] + cfqq->allocated[WRITE];
2139 process_refs = cfqq->ref - io_refs;
2140 BUG_ON(process_refs < 0);
2141 return process_refs;
2142}
2143
2144static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq)
2145{
2146 int process_refs, new_process_refs;
2147 struct cfq_queue *__cfqq;
2148
2149
2150
2151
2152
2153
2154
2155 if (!cfqq_process_refs(new_cfqq))
2156 return;
2157
2158
2159 while ((__cfqq = new_cfqq->new_cfqq)) {
2160 if (__cfqq == cfqq)
2161 return;
2162 new_cfqq = __cfqq;
2163 }
2164
2165 process_refs = cfqq_process_refs(cfqq);
2166 new_process_refs = cfqq_process_refs(new_cfqq);
2167
2168
2169
2170
2171 if (process_refs == 0 || new_process_refs == 0)
2172 return;
2173
2174
2175
2176
2177 if (new_process_refs >= process_refs) {
2178 cfqq->new_cfqq = new_cfqq;
2179 new_cfqq->ref += process_refs;
2180 } else {
2181 new_cfqq->new_cfqq = cfqq;
2182 cfqq->ref += new_process_refs;
2183 }
2184}
2185
2186static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd,
2187 struct cfq_group *cfqg, enum wl_prio_t prio)
2188{
2189 struct cfq_queue *queue;
2190 int i;
2191 bool key_valid = false;
2192 unsigned long lowest_key = 0;
2193 enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
2194
2195 for (i = 0; i <= SYNC_WORKLOAD; ++i) {
2196
2197 queue = cfq_rb_first(service_tree_for(cfqg, prio, i));
2198 if (queue &&
2199 (!key_valid || time_before(queue->rb_key, lowest_key))) {
2200 lowest_key = queue->rb_key;
2201 cur_best = i;
2202 key_valid = true;
2203 }
2204 }
2205
2206 return cur_best;
2207}
2208
2209static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
2210{
2211 unsigned slice;
2212 unsigned count;
2213 struct cfq_rb_root *st;
2214 unsigned group_slice;
2215 enum wl_prio_t original_prio = cfqd->serving_prio;
2216
2217
2218 if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
2219 cfqd->serving_prio = RT_WORKLOAD;
2220 else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
2221 cfqd->serving_prio = BE_WORKLOAD;
2222 else {
2223 cfqd->serving_prio = IDLE_WORKLOAD;
2224 cfqd->workload_expires = jiffies + 1;
2225 return;
2226 }
2227
2228 if (original_prio != cfqd->serving_prio)
2229 goto new_workload;
2230
2231
2232
2233
2234
2235
2236 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2237 count = st->count;
2238
2239
2240
2241
2242 if (count && !time_after(jiffies, cfqd->workload_expires))
2243 return;
2244
2245new_workload:
2246
2247 cfqd->serving_type =
2248 cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio);
2249 st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type);
2250 count = st->count;
2251
2252
2253
2254
2255
2256
2257 group_slice = cfq_group_slice(cfqd, cfqg);
2258
2259 slice = group_slice * count /
2260 max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
2261 cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
2262
2263 if (cfqd->serving_type == ASYNC_WORKLOAD) {
2264 unsigned int tmp;
2265
2266
2267
2268
2269
2270
2271
2272
2273 tmp = cfq_target_latency * cfqg_busy_async_queues(cfqd, cfqg);
2274 tmp = tmp/cfqd->busy_queues;
2275 slice = min_t(unsigned, slice, tmp);
2276
2277
2278
2279 slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
2280 } else
2281
2282 slice = max(slice, 2 * cfqd->cfq_slice_idle);
2283
2284 slice = max_t(unsigned, slice, CFQ_MIN_TT);
2285 cfq_log(cfqd, "workload slice:%d", slice);
2286 cfqd->workload_expires = jiffies + slice;
2287}
2288
2289static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
2290{
2291 struct cfq_rb_root *st = &cfqd->grp_service_tree;
2292 struct cfq_group *cfqg;
2293
2294 if (RB_EMPTY_ROOT(&st->rb))
2295 return NULL;
2296 cfqg = cfq_rb_first_group(st);
2297 update_min_vdisktime(st);
2298 return cfqg;
2299}
2300
2301static void cfq_choose_cfqg(struct cfq_data *cfqd)
2302{
2303 struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
2304
2305 cfqd->serving_group = cfqg;
2306
2307
2308 if (cfqg->saved_workload_slice) {
2309 cfqd->workload_expires = jiffies + cfqg->saved_workload_slice;
2310 cfqd->serving_type = cfqg->saved_workload;
2311 cfqd->serving_prio = cfqg->saved_serving_prio;
2312 } else
2313 cfqd->workload_expires = jiffies - 1;
2314
2315 choose_service_tree(cfqd, cfqg);
2316}
2317
2318
2319
2320
2321
2322static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
2323{
2324 struct cfq_queue *cfqq, *new_cfqq = NULL;
2325
2326 cfqq = cfqd->active_queue;
2327 if (!cfqq)
2328 goto new_queue;
2329
2330 if (!cfqd->rq_queued)
2331 return NULL;
2332
2333
2334
2335
2336 if (cfq_cfqq_wait_busy(cfqq) && !RB_EMPTY_ROOT(&cfqq->sort_list))
2337 goto expire;
2338
2339
2340
2341
2342 if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) {
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352 if (cfqq->cfqg->nr_cfqq == 1 && RB_EMPTY_ROOT(&cfqq->sort_list)
2353 && cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2354 cfqq = NULL;
2355 goto keep_queue;
2356 } else
2357 goto check_group_idle;
2358 }
2359
2360
2361
2362
2363
2364 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
2365 goto keep_queue;
2366
2367
2368
2369
2370
2371
2372
2373 new_cfqq = cfq_close_cooperator(cfqd, cfqq);
2374 if (new_cfqq) {
2375 if (!cfqq->new_cfqq)
2376 cfq_setup_merge(cfqq, new_cfqq);
2377 goto expire;
2378 }
2379
2380
2381
2382
2383
2384
2385 if (timer_pending(&cfqd->idle_slice_timer)) {
2386 cfqq = NULL;
2387 goto keep_queue;
2388 }
2389
2390
2391
2392
2393
2394 if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
2395 (cfq_cfqq_slice_new(cfqq) ||
2396 (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
2397 cfq_clear_cfqq_deep(cfqq);
2398 cfq_clear_cfqq_idle_window(cfqq);
2399 }
2400
2401 if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) {
2402 cfqq = NULL;
2403 goto keep_queue;
2404 }
2405
2406
2407
2408
2409
2410check_group_idle:
2411 if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 &&
2412 cfqq->cfqg->dispatched &&
2413 !cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true)) {
2414 cfqq = NULL;
2415 goto keep_queue;
2416 }
2417
2418expire:
2419 cfq_slice_expired(cfqd, 0);
2420new_queue:
2421
2422
2423
2424
2425 if (!new_cfqq)
2426 cfq_choose_cfqg(cfqd);
2427
2428 cfqq = cfq_set_active_queue(cfqd, new_cfqq);
2429keep_queue:
2430 return cfqq;
2431}
2432
2433static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
2434{
2435 int dispatched = 0;
2436
2437 while (cfqq->next_rq) {
2438 cfq_dispatch_insert(cfqq->cfqd->queue, cfqq->next_rq);
2439 dispatched++;
2440 }
2441
2442 BUG_ON(!list_empty(&cfqq->fifo));
2443
2444
2445 __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
2446 return dispatched;
2447}
2448
2449
2450
2451
2452
2453static int cfq_forced_dispatch(struct cfq_data *cfqd)
2454{
2455 struct cfq_queue *cfqq;
2456 int dispatched = 0;
2457
2458
2459 cfq_slice_expired(cfqd, 0);
2460 while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
2461 __cfq_set_active_queue(cfqd, cfqq);
2462 dispatched += __cfq_forced_dispatch_cfqq(cfqq);
2463 }
2464
2465 BUG_ON(cfqd->busy_queues);
2466
2467 cfq_log(cfqd, "forced_dispatch=%d", dispatched);
2468 return dispatched;
2469}
2470
2471static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
2472 struct cfq_queue *cfqq)
2473{
2474
2475 if (cfq_cfqq_slice_new(cfqq))
2476 return true;
2477 if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
2478 cfqq->slice_end))
2479 return true;
2480
2481 return false;
2482}
2483
2484static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2485{
2486 unsigned int max_dispatch;
2487
2488
2489
2490
2491 if (cfq_should_idle(cfqd, cfqq) && cfqd->rq_in_flight[BLK_RW_ASYNC])
2492 return false;
2493
2494
2495
2496
2497 if (cfqd->rq_in_flight[BLK_RW_SYNC] && !cfq_cfqq_sync(cfqq))
2498 return false;
2499
2500 max_dispatch = max_t(unsigned int, cfqd->cfq_quantum / 2, 1);
2501 if (cfq_class_idle(cfqq))
2502 max_dispatch = 1;
2503
2504
2505
2506
2507 if (cfqq->dispatched >= max_dispatch) {
2508 bool promote_sync = false;
2509
2510
2511
2512 if (cfq_class_idle(cfqq))
2513 return false;
2514
2515
2516
2517
2518
2519
2520
2521
2522 if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1)
2523 promote_sync = true;
2524
2525
2526
2527
2528 if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) &&
2529 !promote_sync)
2530 return false;
2531
2532
2533
2534
2535 if (cfqd->busy_queues == 1 || promote_sync)
2536 max_dispatch = -1;
2537 else
2538
2539
2540
2541
2542
2543
2544 max_dispatch = cfqd->cfq_quantum;
2545 }
2546
2547
2548
2549
2550
2551
2552 if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
2553 unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
2554 unsigned int depth;
2555
2556 depth = last_sync / cfqd->cfq_slice[1];
2557 if (!depth && !cfqq->dispatched)
2558 depth = 1;
2559 if (depth < max_dispatch)
2560 max_dispatch = depth;
2561 }
2562
2563
2564
2565
2566 return cfqq->dispatched < max_dispatch;
2567}
2568
2569
2570
2571
2572
2573static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2574{
2575 struct request *rq;
2576
2577 BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
2578
2579 if (!cfq_may_dispatch(cfqd, cfqq))
2580 return false;
2581
2582
2583
2584
2585 rq = cfq_check_fifo(cfqq);
2586 if (!rq)
2587 rq = cfqq->next_rq;
2588
2589
2590
2591
2592 cfq_dispatch_insert(cfqd->queue, rq);
2593
2594 if (!cfqd->active_cic) {
2595 struct cfq_io_context *cic = RQ_CIC(rq);
2596
2597 atomic_long_inc(&cic->ioc->refcount);
2598 cfqd->active_cic = cic;
2599 }
2600
2601 return true;
2602}
2603
2604
2605
2606
2607
2608static int cfq_dispatch_requests(struct request_queue *q, int force)
2609{
2610 struct cfq_data *cfqd = q->elevator->elevator_data;
2611 struct cfq_queue *cfqq;
2612
2613 if (!cfqd->busy_queues)
2614 return 0;
2615
2616 if (unlikely(force))
2617 return cfq_forced_dispatch(cfqd);
2618
2619 cfqq = cfq_select_queue(cfqd);
2620 if (!cfqq)
2621 return 0;
2622
2623
2624
2625
2626 if (!cfq_dispatch_request(cfqd, cfqq))
2627 return 0;
2628
2629 cfqq->slice_dispatch++;
2630 cfq_clear_cfqq_must_dispatch(cfqq);
2631
2632
2633
2634
2635
2636 if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
2637 cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
2638 cfq_class_idle(cfqq))) {
2639 cfqq->slice_end = jiffies + 1;
2640 cfq_slice_expired(cfqd, 0);
2641 }
2642
2643 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
2644 return 1;
2645}
2646
2647
2648
2649
2650
2651
2652
2653
2654static void cfq_put_queue(struct cfq_queue *cfqq)
2655{
2656 struct cfq_data *cfqd = cfqq->cfqd;
2657 struct cfq_group *cfqg;
2658
2659 BUG_ON(cfqq->ref <= 0);
2660
2661 cfqq->ref--;
2662 if (cfqq->ref)
2663 return;
2664
2665 cfq_log_cfqq(cfqd, cfqq, "put_queue");
2666 BUG_ON(rb_first(&cfqq->sort_list));
2667 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
2668 cfqg = cfqq->cfqg;
2669
2670 if (unlikely(cfqd->active_queue == cfqq)) {
2671 __cfq_slice_expired(cfqd, cfqq, 0);
2672 cfq_schedule_dispatch(cfqd);
2673 }
2674
2675 BUG_ON(cfq_cfqq_on_rr(cfqq));
2676 kmem_cache_free(cfq_pool, cfqq);
2677 cfq_put_cfqg(cfqg);
2678}
2679
2680
2681
2682
2683static void
2684call_for_each_cic(struct io_context *ioc,
2685 void (*func)(struct io_context *, struct cfq_io_context *))
2686{
2687 struct cfq_io_context *cic;
2688 struct hlist_node *n;
2689
2690 rcu_read_lock();
2691
2692 hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
2693 func(ioc, cic);
2694
2695 rcu_read_unlock();
2696}
2697
2698static void cfq_cic_free_rcu(struct rcu_head *head)
2699{
2700 struct cfq_io_context *cic;
2701
2702 cic = container_of(head, struct cfq_io_context, rcu_head);
2703
2704 kmem_cache_free(cfq_ioc_pool, cic);
2705 elv_ioc_count_dec(cfq_ioc_count);
2706
2707 if (ioc_gone) {
2708
2709
2710
2711
2712
2713 spin_lock(&ioc_gone_lock);
2714 if (ioc_gone && !elv_ioc_count_read(cfq_ioc_count)) {
2715 complete(ioc_gone);
2716 ioc_gone = NULL;
2717 }
2718 spin_unlock(&ioc_gone_lock);
2719 }
2720}
2721
2722static void cfq_cic_free(struct cfq_io_context *cic)
2723{
2724 call_rcu(&cic->rcu_head, cfq_cic_free_rcu);
2725}
2726
2727static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
2728{
2729 unsigned long flags;
2730 unsigned long dead_key = (unsigned long) cic->key;
2731
2732 BUG_ON(!(dead_key & CIC_DEAD_KEY));
2733
2734 spin_lock_irqsave(&ioc->lock, flags);
2735 radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
2736 hlist_del_rcu(&cic->cic_list);
2737 spin_unlock_irqrestore(&ioc->lock, flags);
2738
2739 cfq_cic_free(cic);
2740}
2741
2742
2743
2744
2745
2746
2747static void cfq_free_io_context(struct io_context *ioc)
2748{
2749
2750
2751
2752
2753
2754
2755 call_for_each_cic(ioc, cic_free_func);
2756}
2757
2758static void cfq_put_cooperator(struct cfq_queue *cfqq)
2759{
2760 struct cfq_queue *__cfqq, *next;
2761
2762
2763
2764
2765
2766
2767 __cfqq = cfqq->new_cfqq;
2768 while (__cfqq) {
2769 if (__cfqq == cfqq) {
2770 WARN(1, "cfqq->new_cfqq loop detected\n");
2771 break;
2772 }
2773 next = __cfqq->new_cfqq;
2774 cfq_put_queue(__cfqq);
2775 __cfqq = next;
2776 }
2777}
2778
2779static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
2780{
2781 if (unlikely(cfqq == cfqd->active_queue)) {
2782 __cfq_slice_expired(cfqd, cfqq, 0);
2783 cfq_schedule_dispatch(cfqd);
2784 }
2785
2786 cfq_put_cooperator(cfqq);
2787
2788 cfq_put_queue(cfqq);
2789}
2790
2791static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
2792 struct cfq_io_context *cic)
2793{
2794 struct io_context *ioc = cic->ioc;
2795
2796 list_del_init(&cic->queue_list);
2797
2798
2799
2800
2801 smp_wmb();
2802 cic->key = cfqd_dead_key(cfqd);
2803
2804 rcu_read_lock();
2805 if (rcu_dereference(ioc->ioc_data) == cic) {
2806 rcu_read_unlock();
2807 spin_lock(&ioc->lock);
2808 rcu_assign_pointer(ioc->ioc_data, NULL);
2809 spin_unlock(&ioc->lock);
2810 } else
2811 rcu_read_unlock();
2812
2813 if (cic->cfqq[BLK_RW_ASYNC]) {
2814 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
2815 cic->cfqq[BLK_RW_ASYNC] = NULL;
2816 }
2817
2818 if (cic->cfqq[BLK_RW_SYNC]) {
2819 cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
2820 cic->cfqq[BLK_RW_SYNC] = NULL;
2821 }
2822}
2823
2824static void cfq_exit_single_io_context(struct io_context *ioc,
2825 struct cfq_io_context *cic)
2826{
2827 struct cfq_data *cfqd = cic_to_cfqd(cic);
2828
2829 if (cfqd) {
2830 struct request_queue *q = cfqd->queue;
2831 unsigned long flags;
2832
2833 spin_lock_irqsave(q->queue_lock, flags);
2834
2835
2836
2837
2838
2839 smp_read_barrier_depends();
2840 if (cic->key == cfqd)
2841 __cfq_exit_single_io_context(cfqd, cic);
2842
2843 spin_unlock_irqrestore(q->queue_lock, flags);
2844 }
2845}
2846
2847
2848
2849
2850
2851static void cfq_exit_io_context(struct io_context *ioc)
2852{
2853 call_for_each_cic(ioc, cfq_exit_single_io_context);
2854}
2855
2856static struct cfq_io_context *
2857cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
2858{
2859 struct cfq_io_context *cic;
2860
2861 cic = kmem_cache_alloc_node(cfq_ioc_pool, gfp_mask | __GFP_ZERO,
2862 cfqd->queue->node);
2863 if (cic) {
2864 cic->ttime.last_end_request = jiffies;
2865 INIT_LIST_HEAD(&cic->queue_list);
2866 INIT_HLIST_NODE(&cic->cic_list);
2867 cic->dtor = cfq_free_io_context;
2868 cic->exit = cfq_exit_io_context;
2869 elv_ioc_count_inc(cfq_ioc_count);
2870 }
2871
2872 return cic;
2873}
2874
2875static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
2876{
2877 struct task_struct *tsk = current;
2878 int ioprio_class;
2879
2880 if (!cfq_cfqq_prio_changed(cfqq))
2881 return;
2882
2883 ioprio_class = IOPRIO_PRIO_CLASS(ioc->ioprio);
2884 switch (ioprio_class) {
2885 default:
2886 printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
2887 case IOPRIO_CLASS_NONE:
2888
2889
2890
2891 cfqq->ioprio = task_nice_ioprio(tsk);
2892 cfqq->ioprio_class = task_nice_ioclass(tsk);
2893 break;
2894 case IOPRIO_CLASS_RT:
2895 cfqq->ioprio = task_ioprio(ioc);
2896 cfqq->ioprio_class = IOPRIO_CLASS_RT;
2897 break;
2898 case IOPRIO_CLASS_BE:
2899 cfqq->ioprio = task_ioprio(ioc);
2900 cfqq->ioprio_class = IOPRIO_CLASS_BE;
2901 break;
2902 case IOPRIO_CLASS_IDLE:
2903 cfqq->ioprio_class = IOPRIO_CLASS_IDLE;
2904 cfqq->ioprio = 7;
2905 cfq_clear_cfqq_idle_window(cfqq);
2906 break;
2907 }
2908
2909
2910
2911
2912
2913 cfqq->org_ioprio = cfqq->ioprio;
2914 cfq_clear_cfqq_prio_changed(cfqq);
2915}
2916
2917static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
2918{
2919 struct cfq_data *cfqd = cic_to_cfqd(cic);
2920 struct cfq_queue *cfqq;
2921 unsigned long flags;
2922
2923 if (unlikely(!cfqd))
2924 return;
2925
2926 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2927
2928 cfqq = cic->cfqq[BLK_RW_ASYNC];
2929 if (cfqq) {
2930 struct cfq_queue *new_cfqq;
2931 new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
2932 GFP_ATOMIC);
2933 if (new_cfqq) {
2934 cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
2935 cfq_put_queue(cfqq);
2936 }
2937 }
2938
2939 cfqq = cic->cfqq[BLK_RW_SYNC];
2940 if (cfqq)
2941 cfq_mark_cfqq_prio_changed(cfqq);
2942
2943 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
2944}
2945
2946static void cfq_ioc_set_ioprio(struct io_context *ioc)
2947{
2948 call_for_each_cic(ioc, changed_ioprio);
2949 ioc->ioprio_changed = 0;
2950}
2951
2952static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
2953 pid_t pid, bool is_sync)
2954{
2955 RB_CLEAR_NODE(&cfqq->rb_node);
2956 RB_CLEAR_NODE(&cfqq->p_node);
2957 INIT_LIST_HEAD(&cfqq->fifo);
2958
2959 cfqq->ref = 0;
2960 cfqq->cfqd = cfqd;
2961
2962 cfq_mark_cfqq_prio_changed(cfqq);
2963
2964 if (is_sync) {
2965 if (!cfq_class_idle(cfqq))
2966 cfq_mark_cfqq_idle_window(cfqq);
2967 cfq_mark_cfqq_sync(cfqq);
2968 }
2969 cfqq->pid = pid;
2970}
2971
2972#ifdef CONFIG_CFQ_GROUP_IOSCHED
2973static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
2974{
2975 struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
2976 struct cfq_data *cfqd = cic_to_cfqd(cic);
2977 unsigned long flags;
2978 struct request_queue *q;
2979
2980 if (unlikely(!cfqd))
2981 return;
2982
2983 q = cfqd->queue;
2984
2985 spin_lock_irqsave(q->queue_lock, flags);
2986
2987 if (sync_cfqq) {
2988
2989
2990
2991
2992 cfq_log_cfqq(cfqd, sync_cfqq, "changed cgroup");
2993 cic_set_cfqq(cic, NULL, 1);
2994 cfq_put_queue(sync_cfqq);
2995 }
2996
2997 spin_unlock_irqrestore(q->queue_lock, flags);
2998}
2999
3000static void cfq_ioc_set_cgroup(struct io_context *ioc)
3001{
3002 call_for_each_cic(ioc, changed_cgroup);
3003 ioc->cgroup_changed = 0;
3004}
3005#endif
3006
3007static struct cfq_queue *
3008cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
3009 struct io_context *ioc, gfp_t gfp_mask)
3010{
3011 struct cfq_queue *cfqq, *new_cfqq = NULL;
3012 struct cfq_io_context *cic;
3013 struct cfq_group *cfqg;
3014
3015retry:
3016 cfqg = cfq_get_cfqg(cfqd);
3017 cic = cfq_cic_lookup(cfqd, ioc);
3018
3019 cfqq = cic_to_cfqq(cic, is_sync);
3020
3021
3022
3023
3024
3025 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
3026 cfqq = NULL;
3027 if (new_cfqq) {
3028 cfqq = new_cfqq;
3029 new_cfqq = NULL;
3030 } else if (gfp_mask & __GFP_WAIT) {
3031 spin_unlock_irq(cfqd->queue->queue_lock);
3032 new_cfqq = kmem_cache_alloc_node(cfq_pool,
3033 gfp_mask | __GFP_ZERO,
3034 cfqd->queue->node);
3035 spin_lock_irq(cfqd->queue->queue_lock);
3036 if (new_cfqq)
3037 goto retry;
3038 } else {
3039 cfqq = kmem_cache_alloc_node(cfq_pool,
3040 gfp_mask | __GFP_ZERO,
3041 cfqd->queue->node);
3042 }
3043
3044 if (cfqq) {
3045 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
3046 cfq_init_prio_data(cfqq, ioc);
3047 cfq_link_cfqq_cfqg(cfqq, cfqg);
3048 cfq_log_cfqq(cfqd, cfqq, "alloced");
3049 } else
3050 cfqq = &cfqd->oom_cfqq;
3051 }
3052
3053 if (new_cfqq)
3054 kmem_cache_free(cfq_pool, new_cfqq);
3055
3056 return cfqq;
3057}
3058
3059static struct cfq_queue **
3060cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
3061{
3062 switch (ioprio_class) {
3063 case IOPRIO_CLASS_RT:
3064 return &cfqd->async_cfqq[0][ioprio];
3065 case IOPRIO_CLASS_BE:
3066 return &cfqd->async_cfqq[1][ioprio];
3067 case IOPRIO_CLASS_IDLE:
3068 return &cfqd->async_idle_cfqq;
3069 default:
3070 BUG();
3071 }
3072}
3073
3074static struct cfq_queue *
3075cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
3076 gfp_t gfp_mask)
3077{
3078 const int ioprio = task_ioprio(ioc);
3079 const int ioprio_class = task_ioprio_class(ioc);
3080 struct cfq_queue **async_cfqq = NULL;
3081 struct cfq_queue *cfqq = NULL;
3082
3083 if (!is_sync) {
3084 async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio);
3085 cfqq = *async_cfqq;
3086 }
3087
3088 if (!cfqq)
3089 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
3090
3091
3092
3093
3094 if (!is_sync && !(*async_cfqq)) {
3095 cfqq->ref++;
3096 *async_cfqq = cfqq;
3097 }
3098
3099 cfqq->ref++;
3100 return cfqq;
3101}
3102
3103
3104
3105
3106static void
3107cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
3108 struct cfq_io_context *cic)
3109{
3110 unsigned long flags;
3111
3112 WARN_ON(!list_empty(&cic->queue_list));
3113 BUG_ON(cic->key != cfqd_dead_key(cfqd));
3114
3115 spin_lock_irqsave(&ioc->lock, flags);
3116
3117 BUG_ON(rcu_dereference_check(ioc->ioc_data,
3118 lockdep_is_held(&ioc->lock)) == cic);
3119
3120 radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
3121 hlist_del_rcu(&cic->cic_list);
3122 spin_unlock_irqrestore(&ioc->lock, flags);
3123
3124 cfq_cic_free(cic);
3125}
3126
3127static struct cfq_io_context *
3128cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
3129{
3130 struct cfq_io_context *cic;
3131 unsigned long flags;
3132
3133 if (unlikely(!ioc))
3134 return NULL;
3135
3136 rcu_read_lock();
3137
3138
3139
3140
3141 cic = rcu_dereference(ioc->ioc_data);
3142 if (cic && cic->key == cfqd) {
3143 rcu_read_unlock();
3144 return cic;
3145 }
3146
3147 do {
3148 cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
3149 rcu_read_unlock();
3150 if (!cic)
3151 break;
3152 if (unlikely(cic->key != cfqd)) {
3153 cfq_drop_dead_cic(cfqd, ioc, cic);
3154 rcu_read_lock();
3155 continue;
3156 }
3157
3158 spin_lock_irqsave(&ioc->lock, flags);
3159 rcu_assign_pointer(ioc->ioc_data, cic);
3160 spin_unlock_irqrestore(&ioc->lock, flags);
3161 break;
3162 } while (1);
3163
3164 return cic;
3165}
3166
3167
3168
3169
3170
3171
3172static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
3173 struct cfq_io_context *cic, gfp_t gfp_mask)
3174{
3175 unsigned long flags;
3176 int ret;
3177
3178 ret = radix_tree_preload(gfp_mask);
3179 if (!ret) {
3180 cic->ioc = ioc;
3181 cic->key = cfqd;
3182
3183 spin_lock_irqsave(&ioc->lock, flags);
3184 ret = radix_tree_insert(&ioc->radix_root,
3185 cfqd->cic_index, cic);
3186 if (!ret)
3187 hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
3188 spin_unlock_irqrestore(&ioc->lock, flags);
3189
3190 radix_tree_preload_end();
3191
3192 if (!ret) {
3193 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
3194 list_add(&cic->queue_list, &cfqd->cic_list);
3195 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
3196 }
3197 }
3198
3199 if (ret && ret != -EEXIST)
3200 printk(KERN_ERR "cfq: cic link failed!\n");
3201
3202 return ret;
3203}
3204
3205
3206
3207
3208
3209
3210static struct cfq_io_context *
3211cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
3212{
3213 struct io_context *ioc = NULL;
3214 struct cfq_io_context *cic;
3215 int ret;
3216
3217 might_sleep_if(gfp_mask & __GFP_WAIT);
3218
3219 ioc = get_io_context(gfp_mask, cfqd->queue->node);
3220 if (!ioc)
3221 return NULL;
3222
3223retry:
3224 cic = cfq_cic_lookup(cfqd, ioc);
3225 if (cic)
3226 goto out;
3227
3228 cic = cfq_alloc_io_context(cfqd, gfp_mask);
3229 if (cic == NULL)
3230 goto err;
3231
3232 ret = cfq_cic_link(cfqd, ioc, cic, gfp_mask);
3233 if (ret == -EEXIST) {
3234
3235 cfq_cic_free(cic);
3236 goto retry;
3237 } else if (ret)
3238 goto err_free;
3239
3240out:
3241 smp_read_barrier_depends();
3242 if (unlikely(ioc->ioprio_changed))
3243 cfq_ioc_set_ioprio(ioc);
3244
3245#ifdef CONFIG_CFQ_GROUP_IOSCHED
3246 if (unlikely(ioc->cgroup_changed))
3247 cfq_ioc_set_cgroup(ioc);
3248#endif
3249 return cic;
3250err_free:
3251 cfq_cic_free(cic);
3252err:
3253 put_io_context(ioc);
3254 return NULL;
3255}
3256
3257static void
3258__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
3259{
3260 unsigned long elapsed = jiffies - ttime->last_end_request;
3261 elapsed = min(elapsed, 2UL * slice_idle);
3262
3263 ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
3264 ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
3265 ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
3266}
3267
3268static void
3269cfq_update_io_thinktime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3270 struct cfq_io_context *cic)
3271{
3272 if (cfq_cfqq_sync(cfqq)) {
3273 __cfq_update_io_thinktime(&cic->ttime, cfqd->cfq_slice_idle);
3274 __cfq_update_io_thinktime(&cfqq->service_tree->ttime,
3275 cfqd->cfq_slice_idle);
3276 }
3277#ifdef CONFIG_CFQ_GROUP_IOSCHED
3278 __cfq_update_io_thinktime(&cfqq->cfqg->ttime, cfqd->cfq_group_idle);
3279#endif
3280}
3281
3282static void
3283cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3284 struct request *rq)
3285{
3286 sector_t sdist = 0;
3287 sector_t n_sec = blk_rq_sectors(rq);
3288 if (cfqq->last_request_pos) {
3289 if (cfqq->last_request_pos < blk_rq_pos(rq))
3290 sdist = blk_rq_pos(rq) - cfqq->last_request_pos;
3291 else
3292 sdist = cfqq->last_request_pos - blk_rq_pos(rq);
3293 }
3294
3295 cfqq->seek_history <<= 1;
3296 if (blk_queue_nonrot(cfqd->queue))
3297 cfqq->seek_history |= (n_sec < CFQQ_SECT_THR_NONROT);
3298 else
3299 cfqq->seek_history |= (sdist > CFQQ_SEEK_THR);
3300}
3301
3302
3303
3304
3305
3306static void
3307cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3308 struct cfq_io_context *cic)
3309{
3310 int old_idle, enable_idle;
3311
3312
3313
3314
3315 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
3316 return;
3317
3318 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
3319
3320 if (cfqq->queued[0] + cfqq->queued[1] >= 4)
3321 cfq_mark_cfqq_deep(cfqq);
3322
3323 if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE))
3324 enable_idle = 0;
3325 else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
3326 (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq)))
3327 enable_idle = 0;
3328 else if (sample_valid(cic->ttime.ttime_samples)) {
3329 if (cic->ttime.ttime_mean > cfqd->cfq_slice_idle)
3330 enable_idle = 0;
3331 else
3332 enable_idle = 1;
3333 }
3334
3335 if (old_idle != enable_idle) {
3336 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
3337 if (enable_idle)
3338 cfq_mark_cfqq_idle_window(cfqq);
3339 else
3340 cfq_clear_cfqq_idle_window(cfqq);
3341 }
3342}
3343
3344
3345
3346
3347
3348static bool
3349cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3350 struct request *rq)
3351{
3352 struct cfq_queue *cfqq;
3353
3354 cfqq = cfqd->active_queue;
3355 if (!cfqq)
3356 return false;
3357
3358 if (cfq_class_idle(new_cfqq))
3359 return false;
3360
3361 if (cfq_class_idle(cfqq))
3362 return true;
3363
3364
3365
3366
3367 if (cfq_class_rt(cfqq) && !cfq_class_rt(new_cfqq))
3368 return false;
3369
3370
3371
3372
3373
3374 if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
3375 return true;
3376
3377 if (new_cfqq->cfqg != cfqq->cfqg)
3378 return false;
3379
3380 if (cfq_slice_used(cfqq))
3381 return true;
3382
3383
3384 if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD &&
3385 cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
3386 new_cfqq->service_tree->count == 2 &&
3387 RB_EMPTY_ROOT(&cfqq->sort_list))
3388 return true;
3389
3390
3391
3392
3393
3394 if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
3395 return true;
3396
3397
3398
3399
3400 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
3401 return true;
3402
3403
3404 if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
3405 return true;
3406
3407 if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
3408 return false;
3409
3410
3411
3412
3413
3414 if (cfq_rq_close(cfqd, cfqq, rq))
3415 return true;
3416
3417 return false;
3418}
3419
3420
3421
3422
3423
3424static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3425{
3426 struct cfq_queue *old_cfqq = cfqd->active_queue;
3427
3428 cfq_log_cfqq(cfqd, cfqq, "preempt");
3429 cfq_slice_expired(cfqd, 1);
3430
3431
3432
3433
3434
3435 if (cfqq_type(old_cfqq) != cfqq_type(cfqq))
3436 cfqq->cfqg->saved_workload_slice = 0;
3437
3438
3439
3440
3441
3442 BUG_ON(!cfq_cfqq_on_rr(cfqq));
3443
3444 cfq_service_tree_add(cfqd, cfqq, 1);
3445
3446 cfqq->slice_end = 0;
3447 cfq_mark_cfqq_slice_new(cfqq);
3448}
3449
3450
3451
3452
3453
3454static void
3455cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3456 struct request *rq)
3457{
3458 struct cfq_io_context *cic = RQ_CIC(rq);
3459
3460 cfqd->rq_queued++;
3461 if (rq->cmd_flags & REQ_PRIO)
3462 cfqq->prio_pending++;
3463
3464 cfq_update_io_thinktime(cfqd, cfqq, cic);
3465 cfq_update_io_seektime(cfqd, cfqq, rq);
3466 cfq_update_idle_window(cfqd, cfqq, cic);
3467
3468 cfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
3469
3470 if (cfqq == cfqd->active_queue) {
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481 if (cfq_cfqq_wait_request(cfqq)) {
3482 if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
3483 cfqd->busy_queues > 1) {
3484 cfq_del_timer(cfqd, cfqq);
3485 cfq_clear_cfqq_wait_request(cfqq);
3486 __blk_run_queue(cfqd->queue);
3487 } else {
3488 cfq_blkiocg_update_idle_time_stats(
3489 &cfqq->cfqg->blkg);
3490 cfq_mark_cfqq_must_dispatch(cfqq);
3491 }
3492 }
3493 } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
3494
3495
3496
3497
3498
3499
3500 cfq_preempt_queue(cfqd, cfqq);
3501 __blk_run_queue(cfqd->queue);
3502 }
3503}
3504
3505static void cfq_insert_request(struct request_queue *q, struct request *rq)
3506{
3507 struct cfq_data *cfqd = q->elevator->elevator_data;
3508 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3509
3510 cfq_log_cfqq(cfqd, cfqq, "insert_request");
3511 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
3512
3513 rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
3514 list_add_tail(&rq->queuelist, &cfqq->fifo);
3515 cfq_add_rq_rb(rq);
3516 cfq_blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg,
3517 &cfqd->serving_group->blkg, rq_data_dir(rq),
3518 rq_is_sync(rq));
3519 cfq_rq_enqueued(cfqd, cfqq, rq);
3520}
3521
3522
3523
3524
3525
3526static void cfq_update_hw_tag(struct cfq_data *cfqd)
3527{
3528 struct cfq_queue *cfqq = cfqd->active_queue;
3529
3530 if (cfqd->rq_in_driver > cfqd->hw_tag_est_depth)
3531 cfqd->hw_tag_est_depth = cfqd->rq_in_driver;
3532
3533 if (cfqd->hw_tag == 1)
3534 return;
3535
3536 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
3537 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
3538 return;
3539
3540
3541
3542
3543
3544
3545 if (cfqq && cfq_cfqq_idle_window(cfqq) &&
3546 cfqq->dispatched + cfqq->queued[0] + cfqq->queued[1] <
3547 CFQ_HW_QUEUE_MIN && cfqd->rq_in_driver < CFQ_HW_QUEUE_MIN)
3548 return;
3549
3550 if (cfqd->hw_tag_samples++ < 50)
3551 return;
3552
3553 if (cfqd->hw_tag_est_depth >= CFQ_HW_QUEUE_MIN)
3554 cfqd->hw_tag = 1;
3555 else
3556 cfqd->hw_tag = 0;
3557}
3558
3559static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
3560{
3561 struct cfq_io_context *cic = cfqd->active_cic;
3562
3563
3564 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
3565 return false;
3566
3567
3568 if (cfqq->cfqg->nr_cfqq > 1)
3569 return false;
3570
3571
3572 if (cfq_io_thinktime_big(cfqd, &cfqq->cfqg->ttime, true))
3573 return false;
3574
3575 if (cfq_slice_used(cfqq))
3576 return true;
3577
3578
3579 if (cic && sample_valid(cic->ttime.ttime_samples)
3580 && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
3581 return true;
3582
3583
3584
3585
3586
3587
3588
3589
3590 if (cfqq->slice_end - jiffies == 1)
3591 return true;
3592
3593 return false;
3594}
3595
3596static void cfq_completed_request(struct request_queue *q, struct request *rq)
3597{
3598 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3599 struct cfq_data *cfqd = cfqq->cfqd;
3600 const int sync = rq_is_sync(rq);
3601 unsigned long now;
3602
3603 now = jiffies;
3604 cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
3605 !!(rq->cmd_flags & REQ_NOIDLE));
3606
3607 cfq_update_hw_tag(cfqd);
3608
3609 WARN_ON(!cfqd->rq_in_driver);
3610 WARN_ON(!cfqq->dispatched);
3611 cfqd->rq_in_driver--;
3612 cfqq->dispatched--;
3613 (RQ_CFQG(rq))->dispatched--;
3614 cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg,
3615 rq_start_time_ns(rq), rq_io_start_time_ns(rq),
3616 rq_data_dir(rq), rq_is_sync(rq));
3617
3618 cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
3619
3620 if (sync) {
3621 struct cfq_rb_root *service_tree;
3622
3623 RQ_CIC(rq)->ttime.last_end_request = now;
3624
3625 if (cfq_cfqq_on_rr(cfqq))
3626 service_tree = cfqq->service_tree;
3627 else
3628 service_tree = service_tree_for(cfqq->cfqg,
3629 cfqq_prio(cfqq), cfqq_type(cfqq));
3630 service_tree->ttime.last_end_request = now;
3631 if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
3632 cfqd->last_delayed_sync = now;
3633 }
3634
3635#ifdef CONFIG_CFQ_GROUP_IOSCHED
3636 cfqq->cfqg->ttime.last_end_request = now;
3637#endif
3638
3639
3640
3641
3642
3643 if (cfqd->active_queue == cfqq) {
3644 const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
3645
3646 if (cfq_cfqq_slice_new(cfqq)) {
3647 cfq_set_prio_slice(cfqd, cfqq);
3648 cfq_clear_cfqq_slice_new(cfqq);
3649 }
3650
3651
3652
3653
3654
3655 if (cfq_should_wait_busy(cfqd, cfqq)) {
3656 unsigned long extend_sl = cfqd->cfq_slice_idle;
3657 if (!cfqd->cfq_slice_idle)
3658 extend_sl = cfqd->cfq_group_idle;
3659 cfqq->slice_end = jiffies + extend_sl;
3660 cfq_mark_cfqq_wait_busy(cfqq);
3661 cfq_log_cfqq(cfqd, cfqq, "will busy wait");
3662 }
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
3673 cfq_slice_expired(cfqd, 1);
3674 else if (sync && cfqq_empty &&
3675 !cfq_close_cooperator(cfqd, cfqq)) {
3676 cfq_arm_slice_timer(cfqd);
3677 }
3678 }
3679
3680 if (!cfqd->rq_in_driver)
3681 cfq_schedule_dispatch(cfqd);
3682}
3683
3684static inline int __cfq_may_queue(struct cfq_queue *cfqq)
3685{
3686 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
3687 cfq_mark_cfqq_must_alloc_slice(cfqq);
3688 return ELV_MQUEUE_MUST;
3689 }
3690
3691 return ELV_MQUEUE_MAY;
3692}
3693
3694static int cfq_may_queue(struct request_queue *q, int rw)
3695{
3696 struct cfq_data *cfqd = q->elevator->elevator_data;
3697 struct task_struct *tsk = current;
3698 struct cfq_io_context *cic;
3699 struct cfq_queue *cfqq;
3700
3701
3702
3703
3704
3705
3706
3707 cic = cfq_cic_lookup(cfqd, tsk->io_context);
3708 if (!cic)
3709 return ELV_MQUEUE_MAY;
3710
3711 cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
3712 if (cfqq) {
3713 cfq_init_prio_data(cfqq, cic->ioc);
3714
3715 return __cfq_may_queue(cfqq);
3716 }
3717
3718 return ELV_MQUEUE_MAY;
3719}
3720
3721
3722
3723
3724static void cfq_put_request(struct request *rq)
3725{
3726 struct cfq_queue *cfqq = RQ_CFQQ(rq);
3727
3728 if (cfqq) {
3729 const int rw = rq_data_dir(rq);
3730
3731 BUG_ON(!cfqq->allocated[rw]);
3732 cfqq->allocated[rw]--;
3733
3734 put_io_context(RQ_CIC(rq)->ioc);
3735
3736 rq->elevator_private[0] = NULL;
3737 rq->elevator_private[1] = NULL;
3738
3739
3740 cfq_put_cfqg(RQ_CFQG(rq));
3741 rq->elevator_private[2] = NULL;
3742
3743 cfq_put_queue(cfqq);
3744 }
3745}
3746
3747static struct cfq_queue *
3748cfq_merge_cfqqs(struct cfq_data *cfqd, struct cfq_io_context *cic,
3749 struct cfq_queue *cfqq)
3750{
3751 cfq_log_cfqq(cfqd, cfqq, "merging with queue %p", cfqq->new_cfqq);
3752 cic_set_cfqq(cic, cfqq->new_cfqq, 1);
3753 cfq_mark_cfqq_coop(cfqq->new_cfqq);
3754 cfq_put_queue(cfqq);
3755 return cic_to_cfqq(cic, 1);
3756}
3757
3758
3759
3760
3761
3762static struct cfq_queue *
3763split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
3764{
3765 if (cfqq_process_refs(cfqq) == 1) {
3766 cfqq->pid = current->pid;
3767 cfq_clear_cfqq_coop(cfqq);
3768 cfq_clear_cfqq_split_coop(cfqq);
3769 return cfqq;
3770 }
3771
3772 cic_set_cfqq(cic, NULL, 1);
3773
3774 cfq_put_cooperator(cfqq);
3775
3776 cfq_put_queue(cfqq);
3777 return NULL;
3778}
3779
3780
3781
3782static int
3783cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
3784{
3785 struct cfq_data *cfqd = q->elevator->elevator_data;
3786 struct cfq_io_context *cic;
3787 const int rw = rq_data_dir(rq);
3788 const bool is_sync = rq_is_sync(rq);
3789 struct cfq_queue *cfqq;
3790 unsigned long flags;
3791
3792 might_sleep_if(gfp_mask & __GFP_WAIT);
3793
3794 cic = cfq_get_io_context(cfqd, gfp_mask);
3795
3796 spin_lock_irqsave(q->queue_lock, flags);
3797
3798 if (!cic)
3799 goto queue_fail;
3800
3801new_queue:
3802 cfqq = cic_to_cfqq(cic, is_sync);
3803 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
3804 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
3805 cic_set_cfqq(cic, cfqq, is_sync);
3806 } else {
3807
3808
3809
3810 if (cfq_cfqq_coop(cfqq) && cfq_cfqq_split_coop(cfqq)) {
3811 cfq_log_cfqq(cfqd, cfqq, "breaking apart cfqq");
3812 cfqq = split_cfqq(cic, cfqq);
3813 if (!cfqq)
3814 goto new_queue;
3815 }
3816
3817
3818
3819
3820
3821
3822
3823 if (cfqq->new_cfqq)
3824 cfqq = cfq_merge_cfqqs(cfqd, cic, cfqq);
3825 }
3826
3827 cfqq->allocated[rw]++;
3828
3829 cfqq->ref++;
3830 rq->elevator_private[0] = cic;
3831 rq->elevator_private[1] = cfqq;
3832 rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
3833 spin_unlock_irqrestore(q->queue_lock, flags);
3834 return 0;
3835
3836queue_fail:
3837 cfq_schedule_dispatch(cfqd);
3838 spin_unlock_irqrestore(q->queue_lock, flags);
3839 cfq_log(cfqd, "set_request fail");
3840 return 1;
3841}
3842
3843static void cfq_kick_queue(struct work_struct *work)
3844{
3845 struct cfq_data *cfqd =
3846 container_of(work, struct cfq_data, unplug_work);
3847 struct request_queue *q = cfqd->queue;
3848
3849 spin_lock_irq(q->queue_lock);
3850 __blk_run_queue(cfqd->queue);
3851 spin_unlock_irq(q->queue_lock);
3852}
3853
3854
3855
3856
3857static void cfq_idle_slice_timer(unsigned long data)
3858{
3859 struct cfq_data *cfqd = (struct cfq_data *) data;
3860 struct cfq_queue *cfqq;
3861 unsigned long flags;
3862 int timed_out = 1;
3863
3864 cfq_log(cfqd, "idle timer fired");
3865
3866 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
3867
3868 cfqq = cfqd->active_queue;
3869 if (cfqq) {
3870 timed_out = 0;
3871
3872
3873
3874
3875 if (cfq_cfqq_must_dispatch(cfqq))
3876 goto out_kick;
3877
3878
3879
3880
3881 if (cfq_slice_used(cfqq))
3882 goto expire;
3883
3884
3885
3886
3887
3888 if (!cfqd->busy_queues)
3889 goto out_cont;
3890
3891
3892
3893
3894 if (!RB_EMPTY_ROOT(&cfqq->sort_list))
3895 goto out_kick;
3896
3897
3898
3899
3900 cfq_clear_cfqq_deep(cfqq);
3901 }
3902expire:
3903 cfq_slice_expired(cfqd, timed_out);
3904out_kick:
3905 cfq_schedule_dispatch(cfqd);
3906out_cont:
3907 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
3908}
3909
3910static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
3911{
3912 del_timer_sync(&cfqd->idle_slice_timer);
3913 cancel_work_sync(&cfqd->unplug_work);
3914}
3915
3916static void cfq_put_async_queues(struct cfq_data *cfqd)
3917{
3918 int i;
3919
3920 for (i = 0; i < IOPRIO_BE_NR; i++) {
3921 if (cfqd->async_cfqq[0][i])
3922 cfq_put_queue(cfqd->async_cfqq[0][i]);
3923 if (cfqd->async_cfqq[1][i])
3924 cfq_put_queue(cfqd->async_cfqq[1][i]);
3925 }
3926
3927 if (cfqd->async_idle_cfqq)
3928 cfq_put_queue(cfqd->async_idle_cfqq);
3929}
3930
3931static void cfq_exit_queue(struct elevator_queue *e)
3932{
3933 struct cfq_data *cfqd = e->elevator_data;
3934 struct request_queue *q = cfqd->queue;
3935 bool wait = false;
3936
3937 cfq_shutdown_timer_wq(cfqd);
3938
3939 spin_lock_irq(q->queue_lock);
3940
3941 if (cfqd->active_queue)
3942 __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
3943
3944 while (!list_empty(&cfqd->cic_list)) {
3945 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
3946 struct cfq_io_context,
3947 queue_list);
3948
3949 __cfq_exit_single_io_context(cfqd, cic);
3950 }
3951
3952 cfq_put_async_queues(cfqd);
3953 cfq_release_cfq_groups(cfqd);
3954
3955
3956
3957
3958
3959 if (cfqd->nr_blkcg_linked_grps)
3960 wait = true;
3961
3962 spin_unlock_irq(q->queue_lock);
3963
3964 cfq_shutdown_timer_wq(cfqd);
3965
3966 spin_lock(&cic_index_lock);
3967 ida_remove(&cic_index_ida, cfqd->cic_index);
3968 spin_unlock(&cic_index_lock);
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981 if (wait)
3982 synchronize_rcu();
3983
3984#ifdef CONFIG_CFQ_GROUP_IOSCHED
3985
3986 free_percpu(cfqd->root_group.blkg.stats_cpu);
3987#endif
3988 kfree(cfqd);
3989}
3990
3991static int cfq_alloc_cic_index(void)
3992{
3993 int index, error;
3994
3995 do {
3996 if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
3997 return -ENOMEM;
3998
3999 spin_lock(&cic_index_lock);
4000 error = ida_get_new(&cic_index_ida, &index);
4001 spin_unlock(&cic_index_lock);
4002 if (error && error != -EAGAIN)
4003 return error;
4004 } while (error);
4005
4006 return index;
4007}
4008
4009static void *cfq_init_queue(struct request_queue *q)
4010{
4011 struct cfq_data *cfqd;
4012 int i, j;
4013 struct cfq_group *cfqg;
4014 struct cfq_rb_root *st;
4015
4016 i = cfq_alloc_cic_index();
4017 if (i < 0)
4018 return NULL;
4019
4020 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
4021 if (!cfqd) {
4022 spin_lock(&cic_index_lock);
4023 ida_remove(&cic_index_ida, i);
4024 spin_unlock(&cic_index_lock);
4025 return NULL;
4026 }
4027
4028
4029
4030
4031
4032 cfqd->cic_index = i;
4033
4034
4035 cfqd->grp_service_tree = CFQ_RB_ROOT;
4036
4037
4038 cfqg = &cfqd->root_group;
4039 for_each_cfqg_st(cfqg, i, j, st)
4040 *st = CFQ_RB_ROOT;
4041 RB_CLEAR_NODE(&cfqg->rb_node);
4042
4043
4044 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
4045
4046#ifdef CONFIG_CFQ_GROUP_IOSCHED
4047
4048
4049
4050
4051
4052
4053
4054 cfqg->ref = 2;
4055
4056 if (blkio_alloc_blkg_stats(&cfqg->blkg)) {
4057 kfree(cfqg);
4058
4059 spin_lock(&cic_index_lock);
4060 ida_remove(&cic_index_ida, cfqd->cic_index);
4061 spin_unlock(&cic_index_lock);
4062
4063 kfree(cfqd);
4064 return NULL;
4065 }
4066
4067 rcu_read_lock();
4068
4069 cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg,
4070 (void *)cfqd, 0);
4071 rcu_read_unlock();
4072 cfqd->nr_blkcg_linked_grps++;
4073
4074
4075 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
4076#endif
4077
4078
4079
4080
4081
4082 for (i = 0; i < CFQ_PRIO_LISTS; i++)
4083 cfqd->prio_trees[i] = RB_ROOT;
4084
4085
4086
4087
4088
4089
4090 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
4091 cfqd->oom_cfqq.ref++;
4092 cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group);
4093
4094 INIT_LIST_HEAD(&cfqd->cic_list);
4095
4096 cfqd->queue = q;
4097
4098 init_timer(&cfqd->idle_slice_timer);
4099 cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
4100 cfqd->idle_slice_timer.data = (unsigned long) cfqd;
4101
4102 INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
4103
4104 cfqd->cfq_quantum = cfq_quantum;
4105 cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
4106 cfqd->cfq_fifo_expire[1] = cfq_fifo_expire[1];
4107 cfqd->cfq_back_max = cfq_back_max;
4108 cfqd->cfq_back_penalty = cfq_back_penalty;
4109 cfqd->cfq_slice[0] = cfq_slice_async;
4110 cfqd->cfq_slice[1] = cfq_slice_sync;
4111 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
4112 cfqd->cfq_slice_idle = cfq_slice_idle;
4113 cfqd->cfq_group_idle = cfq_group_idle;
4114 cfqd->cfq_latency = 1;
4115 cfqd->hw_tag = -1;
4116
4117
4118
4119
4120 cfqd->last_delayed_sync = jiffies - HZ;
4121 return cfqd;
4122}
4123
4124static void cfq_slab_kill(void)
4125{
4126
4127
4128
4129
4130 if (cfq_pool)
4131 kmem_cache_destroy(cfq_pool);
4132 if (cfq_ioc_pool)
4133 kmem_cache_destroy(cfq_ioc_pool);
4134}
4135
4136static int __init cfq_slab_setup(void)
4137{
4138 cfq_pool = KMEM_CACHE(cfq_queue, 0);
4139 if (!cfq_pool)
4140 goto fail;
4141
4142 cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0);
4143 if (!cfq_ioc_pool)
4144 goto fail;
4145
4146 return 0;
4147fail:
4148 cfq_slab_kill();
4149 return -ENOMEM;
4150}
4151
4152
4153
4154
4155static ssize_t
4156cfq_var_show(unsigned int var, char *page)
4157{
4158 return sprintf(page, "%d\n", var);
4159}
4160
4161static ssize_t
4162cfq_var_store(unsigned int *var, const char *page, size_t count)
4163{
4164 char *p = (char *) page;
4165
4166 *var = simple_strtoul(p, &p, 10);
4167 return count;
4168}
4169
4170#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
4171static ssize_t __FUNC(struct elevator_queue *e, char *page) \
4172{ \
4173 struct cfq_data *cfqd = e->elevator_data; \
4174 unsigned int __data = __VAR; \
4175 if (__CONV) \
4176 __data = jiffies_to_msecs(__data); \
4177 return cfq_var_show(__data, (page)); \
4178}
4179SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
4180SHOW_FUNCTION(cfq_fifo_expire_sync_show, cfqd->cfq_fifo_expire[1], 1);
4181SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1);
4182SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0);
4183SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0);
4184SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
4185SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1);
4186SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
4187SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
4188SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
4189SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
4190#undef SHOW_FUNCTION
4191
4192#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
4193static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
4194{ \
4195 struct cfq_data *cfqd = e->elevator_data; \
4196 unsigned int __data; \
4197 int ret = cfq_var_store(&__data, (page), count); \
4198 if (__data < (MIN)) \
4199 __data = (MIN); \
4200 else if (__data > (MAX)) \
4201 __data = (MAX); \
4202 if (__CONV) \
4203 *(__PTR) = msecs_to_jiffies(__data); \
4204 else \
4205 *(__PTR) = __data; \
4206 return ret; \
4207}
4208STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, UINT_MAX, 0);
4209STORE_FUNCTION(cfq_fifo_expire_sync_store, &cfqd->cfq_fifo_expire[1], 1,
4210 UINT_MAX, 1);
4211STORE_FUNCTION(cfq_fifo_expire_async_store, &cfqd->cfq_fifo_expire[0], 1,
4212 UINT_MAX, 1);
4213STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0);
4214STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1,
4215 UINT_MAX, 0);
4216STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1);
4217STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1);
4218STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
4219STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
4220STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
4221 UINT_MAX, 0);
4222STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
4223#undef STORE_FUNCTION
4224
4225#define CFQ_ATTR(name) \
4226 __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
4227
4228static struct elv_fs_entry cfq_attrs[] = {
4229 CFQ_ATTR(quantum),
4230 CFQ_ATTR(fifo_expire_sync),
4231 CFQ_ATTR(fifo_expire_async),
4232 CFQ_ATTR(back_seek_max),
4233 CFQ_ATTR(back_seek_penalty),
4234 CFQ_ATTR(slice_sync),
4235 CFQ_ATTR(slice_async),
4236 CFQ_ATTR(slice_async_rq),
4237 CFQ_ATTR(slice_idle),
4238 CFQ_ATTR(group_idle),
4239 CFQ_ATTR(low_latency),
4240 __ATTR_NULL
4241};
4242
4243static struct elevator_type iosched_cfq = {
4244 .ops = {
4245 .elevator_merge_fn = cfq_merge,
4246 .elevator_merged_fn = cfq_merged_request,
4247 .elevator_merge_req_fn = cfq_merged_requests,
4248 .elevator_allow_merge_fn = cfq_allow_merge,
4249 .elevator_bio_merged_fn = cfq_bio_merged,
4250 .elevator_dispatch_fn = cfq_dispatch_requests,
4251 .elevator_add_req_fn = cfq_insert_request,
4252 .elevator_activate_req_fn = cfq_activate_request,
4253 .elevator_deactivate_req_fn = cfq_deactivate_request,
4254 .elevator_completed_req_fn = cfq_completed_request,
4255 .elevator_former_req_fn = elv_rb_former_request,
4256 .elevator_latter_req_fn = elv_rb_latter_request,
4257 .elevator_set_req_fn = cfq_set_request,
4258 .elevator_put_req_fn = cfq_put_request,
4259 .elevator_may_queue_fn = cfq_may_queue,
4260 .elevator_init_fn = cfq_init_queue,
4261 .elevator_exit_fn = cfq_exit_queue,
4262 .trim = cfq_free_io_context,
4263 },
4264 .elevator_attrs = cfq_attrs,
4265 .elevator_name = "cfq",
4266 .elevator_owner = THIS_MODULE,
4267};
4268
4269#ifdef CONFIG_CFQ_GROUP_IOSCHED
4270static struct blkio_policy_type blkio_policy_cfq = {
4271 .ops = {
4272 .blkio_unlink_group_fn = cfq_unlink_blkio_group,
4273 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
4274 },
4275 .plid = BLKIO_POLICY_PROP,
4276};
4277#else
4278static struct blkio_policy_type blkio_policy_cfq;
4279#endif
4280
4281static int __init cfq_init(void)
4282{
4283
4284
4285
4286 if (!cfq_slice_async)
4287 cfq_slice_async = 1;
4288 if (!cfq_slice_idle)
4289 cfq_slice_idle = 1;
4290
4291#ifdef CONFIG_CFQ_GROUP_IOSCHED
4292 if (!cfq_group_idle)
4293 cfq_group_idle = 1;
4294#else
4295 cfq_group_idle = 0;
4296#endif
4297 if (cfq_slab_setup())
4298 return -ENOMEM;
4299
4300 elv_register(&iosched_cfq);
4301 blkio_policy_register(&blkio_policy_cfq);
4302
4303 return 0;
4304}
4305
4306static void __exit cfq_exit(void)
4307{
4308 DECLARE_COMPLETION_ONSTACK(all_gone);
4309 blkio_policy_unregister(&blkio_policy_cfq);
4310 elv_unregister(&iosched_cfq);
4311 ioc_gone = &all_gone;
4312
4313 smp_wmb();
4314
4315
4316
4317
4318
4319 if (elv_ioc_count_read(cfq_ioc_count))
4320 wait_for_completion(&all_gone);
4321 ida_destroy(&cic_index_ida);
4322 cfq_slab_kill();
4323}
4324
4325module_init(cfq_init);
4326module_exit(cfq_exit);
4327
4328MODULE_AUTHOR("Jens Axboe");
4329MODULE_LICENSE("GPL");
4330MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
4331