1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/backing-dev.h>
16#include <linux/bio.h>
17#include <linux/blkdev.h>
18#include <linux/highmem.h>
19#include <linux/mm.h>
20#include <linux/kernel_stat.h>
21#include <linux/string.h>
22#include <linux/init.h>
23#include <linux/bootmem.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h>
29#include <linux/interrupt.h>
30#include <linux/cpu.h>
31#include <linux/blktrace_api.h>
32#include <linux/fault-inject.h>
33#include <linux/scatterlist.h>
34
35
36
37
38#include <scsi/scsi_cmnd.h>
39
40static void blk_unplug_work(struct work_struct *work);
41static void blk_unplug_timeout(unsigned long data);
42static void drive_stat_acct(struct request *rq, int new_io);
43static void init_request_from_bio(struct request *req, struct bio *bio);
44static int __make_request(struct request_queue *q, struct bio *bio);
45static struct io_context *current_io_context(gfp_t gfp_flags, int node);
46static void blk_recalc_rq_segments(struct request *rq);
47static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
48 struct bio *bio);
49
50
51
52
53static struct kmem_cache *request_cachep;
54
55
56
57
58static struct kmem_cache *requestq_cachep;
59
60
61
62
63static struct kmem_cache *iocontext_cachep;
64
65
66
67
68static struct workqueue_struct *kblockd_workqueue;
69
70unsigned long blk_max_low_pfn, blk_max_pfn;
71
72EXPORT_SYMBOL(blk_max_low_pfn);
73EXPORT_SYMBOL(blk_max_pfn);
74
75static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
76
77
78#define BLK_BATCH_TIME (HZ/50UL)
79
80
81#define BLK_BATCH_REQ 32
82
83
84
85
86
87
88static inline int queue_congestion_on_threshold(struct request_queue *q)
89{
90 return q->nr_congestion_on;
91}
92
93
94
95
96static inline int queue_congestion_off_threshold(struct request_queue *q)
97{
98 return q->nr_congestion_off;
99}
100
101static void blk_queue_congestion_threshold(struct request_queue *q)
102{
103 int nr;
104
105 nr = q->nr_requests - (q->nr_requests / 8) + 1;
106 if (nr > q->nr_requests)
107 nr = q->nr_requests;
108 q->nr_congestion_on = nr;
109
110 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
111 if (nr < 1)
112 nr = 1;
113 q->nr_congestion_off = nr;
114}
115
116
117
118
119
120
121
122
123
124
125struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
126{
127 struct backing_dev_info *ret = NULL;
128 struct request_queue *q = bdev_get_queue(bdev);
129
130 if (q)
131 ret = &q->backing_dev_info;
132 return ret;
133}
134EXPORT_SYMBOL(blk_get_backing_dev_info);
135
136
137
138
139
140
141
142
143
144
145
146
147void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
148{
149 q->prep_rq_fn = pfn;
150}
151
152EXPORT_SYMBOL(blk_queue_prep_rq);
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)
171{
172 q->merge_bvec_fn = mbfn;
173}
174
175EXPORT_SYMBOL(blk_queue_merge_bvec);
176
177void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
178{
179 q->softirq_done_fn = fn;
180}
181
182EXPORT_SYMBOL(blk_queue_softirq_done);
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn)
207{
208
209
210
211 q->nr_requests = BLKDEV_MAX_RQ;
212 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
213 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
214 q->make_request_fn = mfn;
215 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
216 q->backing_dev_info.state = 0;
217 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
218 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
219 blk_queue_hardsect_size(q, 512);
220 blk_queue_dma_alignment(q, 511);
221 blk_queue_congestion_threshold(q);
222 q->nr_batching = BLK_BATCH_REQ;
223
224 q->unplug_thresh = 4;
225 q->unplug_delay = (3 * HZ) / 1000;
226 if (q->unplug_delay == 0)
227 q->unplug_delay = 1;
228
229 INIT_WORK(&q->unplug_work, blk_unplug_work);
230
231 q->unplug_timer.function = blk_unplug_timeout;
232 q->unplug_timer.data = (unsigned long)q;
233
234
235
236
237 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
238}
239
240EXPORT_SYMBOL(blk_queue_make_request);
241
242static void rq_init(struct request_queue *q, struct request *rq)
243{
244 INIT_LIST_HEAD(&rq->queuelist);
245 INIT_LIST_HEAD(&rq->donelist);
246
247 rq->errors = 0;
248 rq->bio = rq->biotail = NULL;
249 INIT_HLIST_NODE(&rq->hash);
250 RB_CLEAR_NODE(&rq->rb_node);
251 rq->ioprio = 0;
252 rq->buffer = NULL;
253 rq->ref_count = 1;
254 rq->q = q;
255 rq->special = NULL;
256 rq->data_len = 0;
257 rq->data = NULL;
258 rq->nr_phys_segments = 0;
259 rq->sense = NULL;
260 rq->end_io = NULL;
261 rq->end_io_data = NULL;
262 rq->completion_data = NULL;
263 rq->next_rq = NULL;
264}
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279int blk_queue_ordered(struct request_queue *q, unsigned ordered,
280 prepare_flush_fn *prepare_flush_fn)
281{
282 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
283 prepare_flush_fn == NULL) {
284 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
285 return -EINVAL;
286 }
287
288 if (ordered != QUEUE_ORDERED_NONE &&
289 ordered != QUEUE_ORDERED_DRAIN &&
290 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
291 ordered != QUEUE_ORDERED_DRAIN_FUA &&
292 ordered != QUEUE_ORDERED_TAG &&
293 ordered != QUEUE_ORDERED_TAG_FLUSH &&
294 ordered != QUEUE_ORDERED_TAG_FUA) {
295 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
296 return -EINVAL;
297 }
298
299 q->ordered = ordered;
300 q->next_ordered = ordered;
301 q->prepare_flush_fn = prepare_flush_fn;
302
303 return 0;
304}
305
306EXPORT_SYMBOL(blk_queue_ordered);
307
308
309
310
311inline unsigned blk_ordered_cur_seq(struct request_queue *q)
312{
313 if (!q->ordseq)
314 return 0;
315 return 1 << ffz(q->ordseq);
316}
317
318unsigned blk_ordered_req_seq(struct request *rq)
319{
320 struct request_queue *q = rq->q;
321
322 BUG_ON(q->ordseq == 0);
323
324 if (rq == &q->pre_flush_rq)
325 return QUEUE_ORDSEQ_PREFLUSH;
326 if (rq == &q->bar_rq)
327 return QUEUE_ORDSEQ_BAR;
328 if (rq == &q->post_flush_rq)
329 return QUEUE_ORDSEQ_POSTFLUSH;
330
331
332
333
334
335
336
337 if (!blk_fs_request(rq))
338 return QUEUE_ORDSEQ_DRAIN;
339
340 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
341 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
342 return QUEUE_ORDSEQ_DRAIN;
343 else
344 return QUEUE_ORDSEQ_DONE;
345}
346
347void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
348{
349 struct request *rq;
350 int uptodate;
351
352 if (error && !q->orderr)
353 q->orderr = error;
354
355 BUG_ON(q->ordseq & seq);
356 q->ordseq |= seq;
357
358 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
359 return;
360
361
362
363
364 uptodate = 1;
365 if (q->orderr)
366 uptodate = q->orderr;
367
368 q->ordseq = 0;
369 rq = q->orig_bar_rq;
370
371 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
372 end_that_request_last(rq, uptodate);
373}
374
375static void pre_flush_end_io(struct request *rq, int error)
376{
377 elv_completed_request(rq->q, rq);
378 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
379}
380
381static void bar_end_io(struct request *rq, int error)
382{
383 elv_completed_request(rq->q, rq);
384 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
385}
386
387static void post_flush_end_io(struct request *rq, int error)
388{
389 elv_completed_request(rq->q, rq);
390 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
391}
392
393static void queue_flush(struct request_queue *q, unsigned which)
394{
395 struct request *rq;
396 rq_end_io_fn *end_io;
397
398 if (which == QUEUE_ORDERED_PREFLUSH) {
399 rq = &q->pre_flush_rq;
400 end_io = pre_flush_end_io;
401 } else {
402 rq = &q->post_flush_rq;
403 end_io = post_flush_end_io;
404 }
405
406 rq->cmd_flags = REQ_HARDBARRIER;
407 rq_init(q, rq);
408 rq->elevator_private = NULL;
409 rq->elevator_private2 = NULL;
410 rq->rq_disk = q->bar_rq.rq_disk;
411 rq->end_io = end_io;
412 q->prepare_flush_fn(q, rq);
413
414 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
415}
416
417static inline struct request *start_ordered(struct request_queue *q,
418 struct request *rq)
419{
420 q->orderr = 0;
421 q->ordered = q->next_ordered;
422 q->ordseq |= QUEUE_ORDSEQ_STARTED;
423
424
425
426
427 blkdev_dequeue_request(rq);
428 q->orig_bar_rq = rq;
429 rq = &q->bar_rq;
430 rq->cmd_flags = 0;
431 rq_init(q, rq);
432 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
433 rq->cmd_flags |= REQ_RW;
434 if (q->ordered & QUEUE_ORDERED_FUA)
435 rq->cmd_flags |= REQ_FUA;
436 rq->elevator_private = NULL;
437 rq->elevator_private2 = NULL;
438 init_request_from_bio(rq, q->orig_bar_rq->bio);
439 rq->end_io = bar_end_io;
440
441
442
443
444
445
446
447
448
449
450 if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
451 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
452 else
453 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
454
455 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
456
457 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
458 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
459 rq = &q->pre_flush_rq;
460 } else
461 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
462
463 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
464 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
465 else
466 rq = NULL;
467
468 return rq;
469}
470
471int blk_do_ordered(struct request_queue *q, struct request **rqp)
472{
473 struct request *rq = *rqp;
474 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
475
476 if (!q->ordseq) {
477 if (!is_barrier)
478 return 1;
479
480 if (q->next_ordered != QUEUE_ORDERED_NONE) {
481 *rqp = start_ordered(q, rq);
482 return 1;
483 } else {
484
485
486
487
488 blkdev_dequeue_request(rq);
489 end_that_request_first(rq, -EOPNOTSUPP,
490 rq->hard_nr_sectors);
491 end_that_request_last(rq, -EOPNOTSUPP);
492 *rqp = NULL;
493 return 0;
494 }
495 }
496
497
498
499
500
501
502 if (!blk_fs_request(rq) &&
503 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
504 return 1;
505
506 if (q->ordered & QUEUE_ORDERED_TAG) {
507
508 if (is_barrier && rq != &q->bar_rq)
509 *rqp = NULL;
510 } else {
511
512 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
513 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
514 *rqp = NULL;
515 }
516
517 return 1;
518}
519
520static void req_bio_endio(struct request *rq, struct bio *bio,
521 unsigned int nbytes, int error)
522{
523 struct request_queue *q = rq->q;
524
525 if (&q->bar_rq != rq) {
526 if (error)
527 clear_bit(BIO_UPTODATE, &bio->bi_flags);
528 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
529 error = -EIO;
530
531 if (unlikely(nbytes > bio->bi_size)) {
532 printk("%s: want %u bytes done, only %u left\n",
533 __FUNCTION__, nbytes, bio->bi_size);
534 nbytes = bio->bi_size;
535 }
536
537 bio->bi_size -= nbytes;
538 bio->bi_sector += (nbytes >> 9);
539 if (bio->bi_size == 0)
540 bio_endio(bio, error);
541 } else {
542
543
544
545
546
547 if (error && !q->orderr)
548 q->orderr = error;
549 }
550}
551
552
553
554
555
556
557
558
559
560
561
562
563void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
564{
565 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
566 int dma = 0;
567
568 q->bounce_gfp = GFP_NOIO;
569#if BITS_PER_LONG == 64
570
571
572
573 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
574 dma = 1;
575 q->bounce_pfn = max_low_pfn;
576#else
577 if (bounce_pfn < blk_max_low_pfn)
578 dma = 1;
579 q->bounce_pfn = bounce_pfn;
580#endif
581 if (dma) {
582 init_emergency_isa_pool();
583 q->bounce_gfp = GFP_NOIO | GFP_DMA;
584 q->bounce_pfn = bounce_pfn;
585 }
586}
587
588EXPORT_SYMBOL(blk_queue_bounce_limit);
589
590
591
592
593
594
595
596
597
598
599void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
600{
601 if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
602 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
603 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
604 }
605
606 if (BLK_DEF_MAX_SECTORS > max_sectors)
607 q->max_hw_sectors = q->max_sectors = max_sectors;
608 else {
609 q->max_sectors = BLK_DEF_MAX_SECTORS;
610 q->max_hw_sectors = max_sectors;
611 }
612}
613
614EXPORT_SYMBOL(blk_queue_max_sectors);
615
616
617
618
619
620
621
622
623
624
625
626void blk_queue_max_phys_segments(struct request_queue *q,
627 unsigned short max_segments)
628{
629 if (!max_segments) {
630 max_segments = 1;
631 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
632 }
633
634 q->max_phys_segments = max_segments;
635}
636
637EXPORT_SYMBOL(blk_queue_max_phys_segments);
638
639
640
641
642
643
644
645
646
647
648
649
650void blk_queue_max_hw_segments(struct request_queue *q,
651 unsigned short max_segments)
652{
653 if (!max_segments) {
654 max_segments = 1;
655 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
656 }
657
658 q->max_hw_segments = max_segments;
659}
660
661EXPORT_SYMBOL(blk_queue_max_hw_segments);
662
663
664
665
666
667
668
669
670
671
672void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
673{
674 if (max_size < PAGE_CACHE_SIZE) {
675 max_size = PAGE_CACHE_SIZE;
676 printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
677 }
678
679 q->max_segment_size = max_size;
680}
681
682EXPORT_SYMBOL(blk_queue_max_segment_size);
683
684
685
686
687
688
689
690
691
692
693
694
695void blk_queue_hardsect_size(struct request_queue *q, unsigned short size)
696{
697 q->hardsect_size = size;
698}
699
700EXPORT_SYMBOL(blk_queue_hardsect_size);
701
702
703
704
705#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
706
707
708
709
710
711
712void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
713{
714
715 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
716 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
717
718 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
719 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
720 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
721 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
722 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
723 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
724}
725
726EXPORT_SYMBOL(blk_queue_stack_limits);
727
728
729
730
731
732
733void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
734{
735 if (mask < PAGE_CACHE_SIZE - 1) {
736 mask = PAGE_CACHE_SIZE - 1;
737 printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
738 }
739
740 q->seg_boundary_mask = mask;
741}
742
743EXPORT_SYMBOL(blk_queue_segment_boundary);
744
745
746
747
748
749
750
751
752
753
754
755void blk_queue_dma_alignment(struct request_queue *q, int mask)
756{
757 q->dma_alignment = mask;
758}
759
760EXPORT_SYMBOL(blk_queue_dma_alignment);
761
762
763
764
765
766
767
768
769
770
771
772
773struct request *blk_queue_find_tag(struct request_queue *q, int tag)
774{
775 return blk_map_queue_find_tag(q->queue_tags, tag);
776}
777
778EXPORT_SYMBOL(blk_queue_find_tag);
779
780
781
782
783
784
785
786
787static int __blk_free_tags(struct blk_queue_tag *bqt)
788{
789 int retval;
790
791 retval = atomic_dec_and_test(&bqt->refcnt);
792 if (retval) {
793 BUG_ON(bqt->busy);
794
795 kfree(bqt->tag_index);
796 bqt->tag_index = NULL;
797
798 kfree(bqt->tag_map);
799 bqt->tag_map = NULL;
800
801 kfree(bqt);
802
803 }
804
805 return retval;
806}
807
808
809
810
811
812
813
814
815
816static void __blk_queue_free_tags(struct request_queue *q)
817{
818 struct blk_queue_tag *bqt = q->queue_tags;
819
820 if (!bqt)
821 return;
822
823 __blk_free_tags(bqt);
824
825 q->queue_tags = NULL;
826 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
827}
828
829
830
831
832
833
834
835
836
837
838void blk_free_tags(struct blk_queue_tag *bqt)
839{
840 if (unlikely(!__blk_free_tags(bqt)))
841 BUG();
842}
843EXPORT_SYMBOL(blk_free_tags);
844
845
846
847
848
849
850
851
852
853void blk_queue_free_tags(struct request_queue *q)
854{
855 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
856}
857
858EXPORT_SYMBOL(blk_queue_free_tags);
859
860static int
861init_tag_map(struct request_queue *q, struct blk_queue_tag *tags, int depth)
862{
863 struct request **tag_index;
864 unsigned long *tag_map;
865 int nr_ulongs;
866
867 if (q && depth > q->nr_requests * 2) {
868 depth = q->nr_requests * 2;
869 printk(KERN_ERR "%s: adjusted depth to %d\n",
870 __FUNCTION__, depth);
871 }
872
873 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC);
874 if (!tag_index)
875 goto fail;
876
877 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
878 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
879 if (!tag_map)
880 goto fail;
881
882 tags->real_max_depth = depth;
883 tags->max_depth = depth;
884 tags->tag_index = tag_index;
885 tags->tag_map = tag_map;
886
887 return 0;
888fail:
889 kfree(tag_index);
890 return -ENOMEM;
891}
892
893static struct blk_queue_tag *__blk_queue_init_tags(struct request_queue *q,
894 int depth)
895{
896 struct blk_queue_tag *tags;
897
898 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
899 if (!tags)
900 goto fail;
901
902 if (init_tag_map(q, tags, depth))
903 goto fail;
904
905 tags->busy = 0;
906 atomic_set(&tags->refcnt, 1);
907 return tags;
908fail:
909 kfree(tags);
910 return NULL;
911}
912
913
914
915
916
917
918struct blk_queue_tag *blk_init_tags(int depth)
919{
920 return __blk_queue_init_tags(NULL, depth);
921}
922EXPORT_SYMBOL(blk_init_tags);
923
924
925
926
927
928
929
930int blk_queue_init_tags(struct request_queue *q, int depth,
931 struct blk_queue_tag *tags)
932{
933 int rc;
934
935 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
936
937 if (!tags && !q->queue_tags) {
938 tags = __blk_queue_init_tags(q, depth);
939
940 if (!tags)
941 goto fail;
942 } else if (q->queue_tags) {
943 if ((rc = blk_queue_resize_tags(q, depth)))
944 return rc;
945 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
946 return 0;
947 } else
948 atomic_inc(&tags->refcnt);
949
950
951
952
953 q->queue_tags = tags;
954 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
955 INIT_LIST_HEAD(&q->tag_busy_list);
956 return 0;
957fail:
958 kfree(tags);
959 return -ENOMEM;
960}
961
962EXPORT_SYMBOL(blk_queue_init_tags);
963
964
965
966
967
968
969
970
971
972int blk_queue_resize_tags(struct request_queue *q, int new_depth)
973{
974 struct blk_queue_tag *bqt = q->queue_tags;
975 struct request **tag_index;
976 unsigned long *tag_map;
977 int max_depth, nr_ulongs;
978
979 if (!bqt)
980 return -ENXIO;
981
982
983
984
985
986
987
988 if (new_depth <= bqt->real_max_depth) {
989 bqt->max_depth = new_depth;
990 return 0;
991 }
992
993
994
995
996
997 if (atomic_read(&bqt->refcnt) != 1)
998 return -EBUSY;
999
1000
1001
1002
1003 tag_index = bqt->tag_index;
1004 tag_map = bqt->tag_map;
1005 max_depth = bqt->real_max_depth;
1006
1007 if (init_tag_map(q, bqt, new_depth))
1008 return -ENOMEM;
1009
1010 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
1011 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
1012 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
1013
1014 kfree(tag_index);
1015 kfree(tag_map);
1016 return 0;
1017}
1018
1019EXPORT_SYMBOL(blk_queue_resize_tags);
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035void blk_queue_end_tag(struct request_queue *q, struct request *rq)
1036{
1037 struct blk_queue_tag *bqt = q->queue_tags;
1038 int tag = rq->tag;
1039
1040 BUG_ON(tag == -1);
1041
1042 if (unlikely(tag >= bqt->real_max_depth))
1043
1044
1045
1046
1047 return;
1048
1049 list_del_init(&rq->queuelist);
1050 rq->cmd_flags &= ~REQ_QUEUED;
1051 rq->tag = -1;
1052
1053 if (unlikely(bqt->tag_index[tag] == NULL))
1054 printk(KERN_ERR "%s: tag %d is missing\n",
1055 __FUNCTION__, tag);
1056
1057 bqt->tag_index[tag] = NULL;
1058
1059 if (unlikely(!test_bit(tag, bqt->tag_map))) {
1060 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
1061 __FUNCTION__, tag);
1062 return;
1063 }
1064
1065
1066
1067
1068 clear_bit_unlock(tag, bqt->tag_map);
1069 bqt->busy--;
1070}
1071
1072EXPORT_SYMBOL(blk_queue_end_tag);
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092int blk_queue_start_tag(struct request_queue *q, struct request *rq)
1093{
1094 struct blk_queue_tag *bqt = q->queue_tags;
1095 int tag;
1096
1097 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
1098 printk(KERN_ERR
1099 "%s: request %p for device [%s] already tagged %d",
1100 __FUNCTION__, rq,
1101 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
1102 BUG();
1103 }
1104
1105
1106
1107
1108
1109 do {
1110 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1111 if (tag >= bqt->max_depth)
1112 return 1;
1113
1114 } while (test_and_set_bit_lock(tag, bqt->tag_map));
1115
1116
1117
1118
1119
1120 rq->cmd_flags |= REQ_QUEUED;
1121 rq->tag = tag;
1122 bqt->tag_index[tag] = rq;
1123 blkdev_dequeue_request(rq);
1124 list_add(&rq->queuelist, &q->tag_busy_list);
1125 bqt->busy++;
1126 return 0;
1127}
1128
1129EXPORT_SYMBOL(blk_queue_start_tag);
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143void blk_queue_invalidate_tags(struct request_queue *q)
1144{
1145 struct list_head *tmp, *n;
1146
1147 list_for_each_safe(tmp, n, &q->tag_busy_list)
1148 blk_requeue_request(q, list_entry_rq(tmp));
1149}
1150
1151EXPORT_SYMBOL(blk_queue_invalidate_tags);
1152
1153void blk_dump_rq_flags(struct request *rq, char *msg)
1154{
1155 int bit;
1156
1157 printk("%s: dev %s: type=%x, flags=%x\n", msg,
1158 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
1159 rq->cmd_flags);
1160
1161 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1162 rq->nr_sectors,
1163 rq->current_nr_sectors);
1164 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1165
1166 if (blk_pc_request(rq)) {
1167 printk("cdb: ");
1168 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1169 printk("%02x ", rq->cmd[bit]);
1170 printk("\n");
1171 }
1172}
1173
1174EXPORT_SYMBOL(blk_dump_rq_flags);
1175
1176void blk_recount_segments(struct request_queue *q, struct bio *bio)
1177{
1178 struct request rq;
1179 struct bio *nxt = bio->bi_next;
1180 rq.q = q;
1181 rq.bio = rq.biotail = bio;
1182 bio->bi_next = NULL;
1183 blk_recalc_rq_segments(&rq);
1184 bio->bi_next = nxt;
1185 bio->bi_phys_segments = rq.nr_phys_segments;
1186 bio->bi_hw_segments = rq.nr_hw_segments;
1187 bio->bi_flags |= (1 << BIO_SEG_VALID);
1188}
1189EXPORT_SYMBOL(blk_recount_segments);
1190
1191static void blk_recalc_rq_segments(struct request *rq)
1192{
1193 int nr_phys_segs;
1194 int nr_hw_segs;
1195 unsigned int phys_size;
1196 unsigned int hw_size;
1197 struct bio_vec *bv, *bvprv = NULL;
1198 int seg_size;
1199 int hw_seg_size;
1200 int cluster;
1201 struct req_iterator iter;
1202 int high, highprv = 1;
1203 struct request_queue *q = rq->q;
1204
1205 if (!rq->bio)
1206 return;
1207
1208 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1209 hw_seg_size = seg_size = 0;
1210 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
1211 rq_for_each_segment(bv, rq, iter) {
1212
1213
1214
1215
1216
1217 high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
1218 if (high || highprv)
1219 goto new_hw_segment;
1220 if (cluster) {
1221 if (seg_size + bv->bv_len > q->max_segment_size)
1222 goto new_segment;
1223 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
1224 goto new_segment;
1225 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
1226 goto new_segment;
1227 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1228 goto new_hw_segment;
1229
1230 seg_size += bv->bv_len;
1231 hw_seg_size += bv->bv_len;
1232 bvprv = bv;
1233 continue;
1234 }
1235new_segment:
1236 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
1237 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1238 hw_seg_size += bv->bv_len;
1239 else {
1240new_hw_segment:
1241 if (nr_hw_segs == 1 &&
1242 hw_seg_size > rq->bio->bi_hw_front_size)
1243 rq->bio->bi_hw_front_size = hw_seg_size;
1244 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
1245 nr_hw_segs++;
1246 }
1247
1248 nr_phys_segs++;
1249 bvprv = bv;
1250 seg_size = bv->bv_len;
1251 highprv = high;
1252 }
1253
1254 if (nr_hw_segs == 1 &&
1255 hw_seg_size > rq->bio->bi_hw_front_size)
1256 rq->bio->bi_hw_front_size = hw_seg_size;
1257 if (hw_seg_size > rq->biotail->bi_hw_back_size)
1258 rq->biotail->bi_hw_back_size = hw_seg_size;
1259 rq->nr_phys_segments = nr_phys_segs;
1260 rq->nr_hw_segments = nr_hw_segs;
1261}
1262
1263static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
1264 struct bio *nxt)
1265{
1266 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
1267 return 0;
1268
1269 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
1270 return 0;
1271 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1272 return 0;
1273
1274
1275
1276
1277
1278 if (BIO_SEG_BOUNDARY(q, bio, nxt))
1279 return 1;
1280
1281 return 0;
1282}
1283
1284static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
1285 struct bio *nxt)
1286{
1287 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1288 blk_recount_segments(q, bio);
1289 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
1290 blk_recount_segments(q, nxt);
1291 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
1292 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
1293 return 0;
1294 if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
1295 return 0;
1296
1297 return 1;
1298}
1299
1300
1301
1302
1303
1304int blk_rq_map_sg(struct request_queue *q, struct request *rq,
1305 struct scatterlist *sglist)
1306{
1307 struct bio_vec *bvec, *bvprv;
1308 struct req_iterator iter;
1309 struct scatterlist *sg;
1310 int nsegs, cluster;
1311
1312 nsegs = 0;
1313 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1314
1315
1316
1317
1318 bvprv = NULL;
1319 sg = NULL;
1320 rq_for_each_segment(bvec, rq, iter) {
1321 int nbytes = bvec->bv_len;
1322
1323 if (bvprv && cluster) {
1324 if (sg->length + nbytes > q->max_segment_size)
1325 goto new_segment;
1326
1327 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1328 goto new_segment;
1329 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1330 goto new_segment;
1331
1332 sg->length += nbytes;
1333 } else {
1334new_segment:
1335 if (!sg)
1336 sg = sglist;
1337 else {
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348 sg->page_link &= ~0x02;
1349 sg = sg_next(sg);
1350 }
1351
1352 sg_set_page(sg, bvec->bv_page, nbytes, bvec->bv_offset);
1353 nsegs++;
1354 }
1355 bvprv = bvec;
1356 }
1357
1358 if (sg)
1359 sg_mark_end(sg);
1360
1361 return nsegs;
1362}
1363
1364EXPORT_SYMBOL(blk_rq_map_sg);
1365
1366
1367
1368
1369
1370
1371static inline int ll_new_mergeable(struct request_queue *q,
1372 struct request *req,
1373 struct bio *bio)
1374{
1375 int nr_phys_segs = bio_phys_segments(q, bio);
1376
1377 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1378 req->cmd_flags |= REQ_NOMERGE;
1379 if (req == q->last_merge)
1380 q->last_merge = NULL;
1381 return 0;
1382 }
1383
1384
1385
1386
1387
1388 req->nr_phys_segments += nr_phys_segs;
1389 return 1;
1390}
1391
1392static inline int ll_new_hw_segment(struct request_queue *q,
1393 struct request *req,
1394 struct bio *bio)
1395{
1396 int nr_hw_segs = bio_hw_segments(q, bio);
1397 int nr_phys_segs = bio_phys_segments(q, bio);
1398
1399 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1400 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1401 req->cmd_flags |= REQ_NOMERGE;
1402 if (req == q->last_merge)
1403 q->last_merge = NULL;
1404 return 0;
1405 }
1406
1407
1408
1409
1410
1411 req->nr_hw_segments += nr_hw_segs;
1412 req->nr_phys_segments += nr_phys_segs;
1413 return 1;
1414}
1415
1416static int ll_back_merge_fn(struct request_queue *q, struct request *req,
1417 struct bio *bio)
1418{
1419 unsigned short max_sectors;
1420 int len;
1421
1422 if (unlikely(blk_pc_request(req)))
1423 max_sectors = q->max_hw_sectors;
1424 else
1425 max_sectors = q->max_sectors;
1426
1427 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1428 req->cmd_flags |= REQ_NOMERGE;
1429 if (req == q->last_merge)
1430 q->last_merge = NULL;
1431 return 0;
1432 }
1433 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1434 blk_recount_segments(q, req->biotail);
1435 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1436 blk_recount_segments(q, bio);
1437 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1438 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1439 !BIOVEC_VIRT_OVERSIZE(len)) {
1440 int mergeable = ll_new_mergeable(q, req, bio);
1441
1442 if (mergeable) {
1443 if (req->nr_hw_segments == 1)
1444 req->bio->bi_hw_front_size = len;
1445 if (bio->bi_hw_segments == 1)
1446 bio->bi_hw_back_size = len;
1447 }
1448 return mergeable;
1449 }
1450
1451 return ll_new_hw_segment(q, req, bio);
1452}
1453
1454static int ll_front_merge_fn(struct request_queue *q, struct request *req,
1455 struct bio *bio)
1456{
1457 unsigned short max_sectors;
1458 int len;
1459
1460 if (unlikely(blk_pc_request(req)))
1461 max_sectors = q->max_hw_sectors;
1462 else
1463 max_sectors = q->max_sectors;
1464
1465
1466 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1467 req->cmd_flags |= REQ_NOMERGE;
1468 if (req == q->last_merge)
1469 q->last_merge = NULL;
1470 return 0;
1471 }
1472 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1473 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1474 blk_recount_segments(q, bio);
1475 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1476 blk_recount_segments(q, req->bio);
1477 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1478 !BIOVEC_VIRT_OVERSIZE(len)) {
1479 int mergeable = ll_new_mergeable(q, req, bio);
1480
1481 if (mergeable) {
1482 if (bio->bi_hw_segments == 1)
1483 bio->bi_hw_front_size = len;
1484 if (req->nr_hw_segments == 1)
1485 req->biotail->bi_hw_back_size = len;
1486 }
1487 return mergeable;
1488 }
1489
1490 return ll_new_hw_segment(q, req, bio);
1491}
1492
1493static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
1494 struct request *next)
1495{
1496 int total_phys_segments;
1497 int total_hw_segments;
1498
1499
1500
1501
1502
1503 if (req->special || next->special)
1504 return 0;
1505
1506
1507
1508
1509 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1510 return 0;
1511
1512 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1513 if (blk_phys_contig_segment(q, req->biotail, next->bio))
1514 total_phys_segments--;
1515
1516 if (total_phys_segments > q->max_phys_segments)
1517 return 0;
1518
1519 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1520 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1521 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1522
1523
1524
1525 if (req->nr_hw_segments == 1)
1526 req->bio->bi_hw_front_size = len;
1527 if (next->nr_hw_segments == 1)
1528 next->biotail->bi_hw_back_size = len;
1529 total_hw_segments--;
1530 }
1531
1532 if (total_hw_segments > q->max_hw_segments)
1533 return 0;
1534
1535
1536 req->nr_phys_segments = total_phys_segments;
1537 req->nr_hw_segments = total_hw_segments;
1538 return 1;
1539}
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549void blk_plug_device(struct request_queue *q)
1550{
1551 WARN_ON(!irqs_disabled());
1552
1553
1554
1555
1556
1557 if (blk_queue_stopped(q))
1558 return;
1559
1560 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1561 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1562 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1563 }
1564}
1565
1566EXPORT_SYMBOL(blk_plug_device);
1567
1568
1569
1570
1571
1572int blk_remove_plug(struct request_queue *q)
1573{
1574 WARN_ON(!irqs_disabled());
1575
1576 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1577 return 0;
1578
1579 del_timer(&q->unplug_timer);
1580 return 1;
1581}
1582
1583EXPORT_SYMBOL(blk_remove_plug);
1584
1585
1586
1587
1588void __generic_unplug_device(struct request_queue *q)
1589{
1590 if (unlikely(blk_queue_stopped(q)))
1591 return;
1592
1593 if (!blk_remove_plug(q))
1594 return;
1595
1596 q->request_fn(q);
1597}
1598EXPORT_SYMBOL(__generic_unplug_device);
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611void generic_unplug_device(struct request_queue *q)
1612{
1613 spin_lock_irq(q->queue_lock);
1614 __generic_unplug_device(q);
1615 spin_unlock_irq(q->queue_lock);
1616}
1617EXPORT_SYMBOL(generic_unplug_device);
1618
1619static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1620 struct page *page)
1621{
1622 struct request_queue *q = bdi->unplug_io_data;
1623
1624 blk_unplug(q);
1625}
1626
1627static void blk_unplug_work(struct work_struct *work)
1628{
1629 struct request_queue *q =
1630 container_of(work, struct request_queue, unplug_work);
1631
1632 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1633 q->rq.count[READ] + q->rq.count[WRITE]);
1634
1635 q->unplug_fn(q);
1636}
1637
1638static void blk_unplug_timeout(unsigned long data)
1639{
1640 struct request_queue *q = (struct request_queue *)data;
1641
1642 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1643 q->rq.count[READ] + q->rq.count[WRITE]);
1644
1645 kblockd_schedule_work(&q->unplug_work);
1646}
1647
1648void blk_unplug(struct request_queue *q)
1649{
1650
1651
1652
1653 if (q->unplug_fn) {
1654 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1655 q->rq.count[READ] + q->rq.count[WRITE]);
1656
1657 q->unplug_fn(q);
1658 }
1659}
1660EXPORT_SYMBOL(blk_unplug);
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671void blk_start_queue(struct request_queue *q)
1672{
1673 WARN_ON(!irqs_disabled());
1674
1675 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1676
1677
1678
1679
1680
1681 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1682 q->request_fn(q);
1683 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1684 } else {
1685 blk_plug_device(q);
1686 kblockd_schedule_work(&q->unplug_work);
1687 }
1688}
1689
1690EXPORT_SYMBOL(blk_start_queue);
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706void blk_stop_queue(struct request_queue *q)
1707{
1708 blk_remove_plug(q);
1709 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1710}
1711EXPORT_SYMBOL(blk_stop_queue);
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727void blk_sync_queue(struct request_queue *q)
1728{
1729 del_timer_sync(&q->unplug_timer);
1730 kblockd_flush_work(&q->unplug_work);
1731}
1732EXPORT_SYMBOL(blk_sync_queue);
1733
1734
1735
1736
1737
1738void blk_run_queue(struct request_queue *q)
1739{
1740 unsigned long flags;
1741
1742 spin_lock_irqsave(q->queue_lock, flags);
1743 blk_remove_plug(q);
1744
1745
1746
1747
1748
1749 if (!elv_queue_empty(q)) {
1750 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1751 q->request_fn(q);
1752 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1753 } else {
1754 blk_plug_device(q);
1755 kblockd_schedule_work(&q->unplug_work);
1756 }
1757 }
1758
1759 spin_unlock_irqrestore(q->queue_lock, flags);
1760}
1761EXPORT_SYMBOL(blk_run_queue);
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778static void blk_release_queue(struct kobject *kobj)
1779{
1780 struct request_queue *q =
1781 container_of(kobj, struct request_queue, kobj);
1782 struct request_list *rl = &q->rq;
1783
1784 blk_sync_queue(q);
1785
1786 if (rl->rq_pool)
1787 mempool_destroy(rl->rq_pool);
1788
1789 if (q->queue_tags)
1790 __blk_queue_free_tags(q);
1791
1792 blk_trace_shutdown(q);
1793
1794 bdi_destroy(&q->backing_dev_info);
1795 kmem_cache_free(requestq_cachep, q);
1796}
1797
1798void blk_put_queue(struct request_queue *q)
1799{
1800 kobject_put(&q->kobj);
1801}
1802EXPORT_SYMBOL(blk_put_queue);
1803
1804void blk_cleanup_queue(struct request_queue * q)
1805{
1806 mutex_lock(&q->sysfs_lock);
1807 set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
1808 mutex_unlock(&q->sysfs_lock);
1809
1810 if (q->elevator)
1811 elevator_exit(q->elevator);
1812
1813 blk_put_queue(q);
1814}
1815
1816EXPORT_SYMBOL(blk_cleanup_queue);
1817
1818static int blk_init_free_list(struct request_queue *q)
1819{
1820 struct request_list *rl = &q->rq;
1821
1822 rl->count[READ] = rl->count[WRITE] = 0;
1823 rl->starved[READ] = rl->starved[WRITE] = 0;
1824 rl->elvpriv = 0;
1825 init_waitqueue_head(&rl->wait[READ]);
1826 init_waitqueue_head(&rl->wait[WRITE]);
1827
1828 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1829 mempool_free_slab, request_cachep, q->node);
1830
1831 if (!rl->rq_pool)
1832 return -ENOMEM;
1833
1834 return 0;
1835}
1836
1837struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
1838{
1839 return blk_alloc_queue_node(gfp_mask, -1);
1840}
1841EXPORT_SYMBOL(blk_alloc_queue);
1842
1843static struct kobj_type queue_ktype;
1844
1845struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1846{
1847 struct request_queue *q;
1848 int err;
1849
1850 q = kmem_cache_alloc_node(requestq_cachep,
1851 gfp_mask | __GFP_ZERO, node_id);
1852 if (!q)
1853 return NULL;
1854
1855 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1856 q->backing_dev_info.unplug_io_data = q;
1857 err = bdi_init(&q->backing_dev_info);
1858 if (err) {
1859 kmem_cache_free(requestq_cachep, q);
1860 return NULL;
1861 }
1862
1863 init_timer(&q->unplug_timer);
1864
1865 kobject_set_name(&q->kobj, "%s", "queue");
1866 q->kobj.ktype = &queue_ktype;
1867 kobject_init(&q->kobj);
1868
1869 mutex_init(&q->sysfs_lock);
1870
1871 return q;
1872}
1873EXPORT_SYMBOL(blk_alloc_queue_node);
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1909{
1910 return blk_init_queue_node(rfn, lock, -1);
1911}
1912EXPORT_SYMBOL(blk_init_queue);
1913
1914struct request_queue *
1915blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1916{
1917 struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1918
1919 if (!q)
1920 return NULL;
1921
1922 q->node = node_id;
1923 if (blk_init_free_list(q)) {
1924 kmem_cache_free(requestq_cachep, q);
1925 return NULL;
1926 }
1927
1928
1929
1930
1931
1932 if (!lock) {
1933 spin_lock_init(&q->__queue_lock);
1934 lock = &q->__queue_lock;
1935 }
1936
1937 q->request_fn = rfn;
1938 q->prep_rq_fn = NULL;
1939 q->unplug_fn = generic_unplug_device;
1940 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1941 q->queue_lock = lock;
1942
1943 blk_queue_segment_boundary(q, 0xffffffff);
1944
1945 blk_queue_make_request(q, __make_request);
1946 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
1947
1948 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
1949 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
1950
1951 q->sg_reserved_size = INT_MAX;
1952
1953
1954
1955
1956 if (!elevator_init(q, NULL)) {
1957 blk_queue_congestion_threshold(q);
1958 return q;
1959 }
1960
1961 blk_put_queue(q);
1962 return NULL;
1963}
1964EXPORT_SYMBOL(blk_init_queue_node);
1965
1966int blk_get_queue(struct request_queue *q)
1967{
1968 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
1969 kobject_get(&q->kobj);
1970 return 0;
1971 }
1972
1973 return 1;
1974}
1975
1976EXPORT_SYMBOL(blk_get_queue);
1977
1978static inline void blk_free_request(struct request_queue *q, struct request *rq)
1979{
1980 if (rq->cmd_flags & REQ_ELVPRIV)
1981 elv_put_request(q, rq);
1982 mempool_free(rq, q->rq.rq_pool);
1983}
1984
1985static struct request *
1986blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
1987{
1988 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1989
1990 if (!rq)
1991 return NULL;
1992
1993
1994
1995
1996
1997 rq->cmd_flags = rw | REQ_ALLOCED;
1998
1999 if (priv) {
2000 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
2001 mempool_free(rq, q->rq.rq_pool);
2002 return NULL;
2003 }
2004 rq->cmd_flags |= REQ_ELVPRIV;
2005 }
2006
2007 return rq;
2008}
2009
2010
2011
2012
2013
2014static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
2015{
2016 if (!ioc)
2017 return 0;
2018
2019
2020
2021
2022
2023
2024 return ioc->nr_batch_requests == q->nr_batching ||
2025 (ioc->nr_batch_requests > 0
2026 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
2027}
2028
2029
2030
2031
2032
2033
2034
2035static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
2036{
2037 if (!ioc || ioc_batching(q, ioc))
2038 return;
2039
2040 ioc->nr_batch_requests = q->nr_batching;
2041 ioc->last_waited = jiffies;
2042}
2043
2044static void __freed_request(struct request_queue *q, int rw)
2045{
2046 struct request_list *rl = &q->rq;
2047
2048 if (rl->count[rw] < queue_congestion_off_threshold(q))
2049 blk_clear_queue_congested(q, rw);
2050
2051 if (rl->count[rw] + 1 <= q->nr_requests) {
2052 if (waitqueue_active(&rl->wait[rw]))
2053 wake_up(&rl->wait[rw]);
2054
2055 blk_clear_queue_full(q, rw);
2056 }
2057}
2058
2059
2060
2061
2062
2063static void freed_request(struct request_queue *q, int rw, int priv)
2064{
2065 struct request_list *rl = &q->rq;
2066
2067 rl->count[rw]--;
2068 if (priv)
2069 rl->elvpriv--;
2070
2071 __freed_request(q, rw);
2072
2073 if (unlikely(rl->starved[rw ^ 1]))
2074 __freed_request(q, rw ^ 1);
2075}
2076
2077#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
2078
2079
2080
2081
2082
2083static struct request *get_request(struct request_queue *q, int rw_flags,
2084 struct bio *bio, gfp_t gfp_mask)
2085{
2086 struct request *rq = NULL;
2087 struct request_list *rl = &q->rq;
2088 struct io_context *ioc = NULL;
2089 const int rw = rw_flags & 0x01;
2090 int may_queue, priv;
2091
2092 may_queue = elv_may_queue(q, rw_flags);
2093 if (may_queue == ELV_MQUEUE_NO)
2094 goto rq_starved;
2095
2096 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
2097 if (rl->count[rw]+1 >= q->nr_requests) {
2098 ioc = current_io_context(GFP_ATOMIC, q->node);
2099
2100
2101
2102
2103
2104
2105 if (!blk_queue_full(q, rw)) {
2106 ioc_set_batching(q, ioc);
2107 blk_set_queue_full(q, rw);
2108 } else {
2109 if (may_queue != ELV_MQUEUE_MUST
2110 && !ioc_batching(q, ioc)) {
2111
2112
2113
2114
2115
2116 goto out;
2117 }
2118 }
2119 }
2120 blk_set_queue_congested(q, rw);
2121 }
2122
2123
2124
2125
2126
2127
2128 if (rl->count[rw] >= (3 * q->nr_requests / 2))
2129 goto out;
2130
2131 rl->count[rw]++;
2132 rl->starved[rw] = 0;
2133
2134 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
2135 if (priv)
2136 rl->elvpriv++;
2137
2138 spin_unlock_irq(q->queue_lock);
2139
2140 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
2141 if (unlikely(!rq)) {
2142
2143
2144
2145
2146
2147
2148
2149 spin_lock_irq(q->queue_lock);
2150 freed_request(q, rw, priv);
2151
2152
2153
2154
2155
2156
2157
2158
2159rq_starved:
2160 if (unlikely(rl->count[rw] == 0))
2161 rl->starved[rw] = 1;
2162
2163 goto out;
2164 }
2165
2166
2167
2168
2169
2170
2171
2172 if (ioc_batching(q, ioc))
2173 ioc->nr_batch_requests--;
2174
2175 rq_init(q, rq);
2176
2177 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
2178out:
2179 return rq;
2180}
2181
2182
2183
2184
2185
2186
2187
2188static struct request *get_request_wait(struct request_queue *q, int rw_flags,
2189 struct bio *bio)
2190{
2191 const int rw = rw_flags & 0x01;
2192 struct request *rq;
2193
2194 rq = get_request(q, rw_flags, bio, GFP_NOIO);
2195 while (!rq) {
2196 DEFINE_WAIT(wait);
2197 struct request_list *rl = &q->rq;
2198
2199 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
2200 TASK_UNINTERRUPTIBLE);
2201
2202 rq = get_request(q, rw_flags, bio, GFP_NOIO);
2203
2204 if (!rq) {
2205 struct io_context *ioc;
2206
2207 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
2208
2209 __generic_unplug_device(q);
2210 spin_unlock_irq(q->queue_lock);
2211 io_schedule();
2212
2213
2214
2215
2216
2217
2218
2219 ioc = current_io_context(GFP_NOIO, q->node);
2220 ioc_set_batching(q, ioc);
2221
2222 spin_lock_irq(q->queue_lock);
2223 }
2224 finish_wait(&rl->wait[rw], &wait);
2225 }
2226
2227 return rq;
2228}
2229
2230struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
2231{
2232 struct request *rq;
2233
2234 BUG_ON(rw != READ && rw != WRITE);
2235
2236 spin_lock_irq(q->queue_lock);
2237 if (gfp_mask & __GFP_WAIT) {
2238 rq = get_request_wait(q, rw, NULL);
2239 } else {
2240 rq = get_request(q, rw, NULL, gfp_mask);
2241 if (!rq)
2242 spin_unlock_irq(q->queue_lock);
2243 }
2244
2245
2246 return rq;
2247}
2248EXPORT_SYMBOL(blk_get_request);
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260void blk_start_queueing(struct request_queue *q)
2261{
2262 if (!blk_queue_plugged(q))
2263 q->request_fn(q);
2264 else
2265 __generic_unplug_device(q);
2266}
2267EXPORT_SYMBOL(blk_start_queueing);
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279void blk_requeue_request(struct request_queue *q, struct request *rq)
2280{
2281 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
2282
2283 if (blk_rq_tagged(rq))
2284 blk_queue_end_tag(q, rq);
2285
2286 elv_requeue_request(q, rq);
2287}
2288
2289EXPORT_SYMBOL(blk_requeue_request);
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310void blk_insert_request(struct request_queue *q, struct request *rq,
2311 int at_head, void *data)
2312{
2313 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2314 unsigned long flags;
2315
2316
2317
2318
2319
2320
2321 rq->cmd_type = REQ_TYPE_SPECIAL;
2322 rq->cmd_flags |= REQ_SOFTBARRIER;
2323
2324 rq->special = data;
2325
2326 spin_lock_irqsave(q->queue_lock, flags);
2327
2328
2329
2330
2331 if (blk_rq_tagged(rq))
2332 blk_queue_end_tag(q, rq);
2333
2334 drive_stat_acct(rq, 1);
2335 __elv_add_request(q, rq, where, 0);
2336 blk_start_queueing(q);
2337 spin_unlock_irqrestore(q->queue_lock, flags);
2338}
2339
2340EXPORT_SYMBOL(blk_insert_request);
2341
2342static int __blk_rq_unmap_user(struct bio *bio)
2343{
2344 int ret = 0;
2345
2346 if (bio) {
2347 if (bio_flagged(bio, BIO_USER_MAPPED))
2348 bio_unmap_user(bio);
2349 else
2350 ret = bio_uncopy_user(bio);
2351 }
2352
2353 return ret;
2354}
2355
2356int blk_rq_append_bio(struct request_queue *q, struct request *rq,
2357 struct bio *bio)
2358{
2359 if (!rq->bio)
2360 blk_rq_bio_prep(q, rq, bio);
2361 else if (!ll_back_merge_fn(q, rq, bio))
2362 return -EINVAL;
2363 else {
2364 rq->biotail->bi_next = bio;
2365 rq->biotail = bio;
2366
2367 rq->data_len += bio->bi_size;
2368 }
2369 return 0;
2370}
2371EXPORT_SYMBOL(blk_rq_append_bio);
2372
2373static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
2374 void __user *ubuf, unsigned int len)
2375{
2376 unsigned long uaddr;
2377 struct bio *bio, *orig_bio;
2378 int reading, ret;
2379
2380 reading = rq_data_dir(rq) == READ;
2381
2382
2383
2384
2385
2386 uaddr = (unsigned long) ubuf;
2387 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
2388 bio = bio_map_user(q, NULL, uaddr, len, reading);
2389 else
2390 bio = bio_copy_user(q, uaddr, len, reading);
2391
2392 if (IS_ERR(bio))
2393 return PTR_ERR(bio);
2394
2395 orig_bio = bio;
2396 blk_queue_bounce(q, &bio);
2397
2398
2399
2400
2401
2402 bio_get(bio);
2403
2404 ret = blk_rq_append_bio(q, rq, bio);
2405 if (!ret)
2406 return bio->bi_size;
2407
2408
2409 bio_endio(bio, 0);
2410 __blk_rq_unmap_user(orig_bio);
2411 bio_put(bio);
2412 return ret;
2413}
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435int blk_rq_map_user(struct request_queue *q, struct request *rq,
2436 void __user *ubuf, unsigned long len)
2437{
2438 unsigned long bytes_read = 0;
2439 struct bio *bio = NULL;
2440 int ret;
2441
2442 if (len > (q->max_hw_sectors << 9))
2443 return -EINVAL;
2444 if (!len || !ubuf)
2445 return -EINVAL;
2446
2447 while (bytes_read != len) {
2448 unsigned long map_len, end, start;
2449
2450 map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE);
2451 end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1)
2452 >> PAGE_SHIFT;
2453 start = (unsigned long)ubuf >> PAGE_SHIFT;
2454
2455
2456
2457
2458
2459
2460 if (end - start > BIO_MAX_PAGES)
2461 map_len -= PAGE_SIZE;
2462
2463 ret = __blk_rq_map_user(q, rq, ubuf, map_len);
2464 if (ret < 0)
2465 goto unmap_rq;
2466 if (!bio)
2467 bio = rq->bio;
2468 bytes_read += ret;
2469 ubuf += ret;
2470 }
2471
2472 rq->buffer = rq->data = NULL;
2473 return 0;
2474unmap_rq:
2475 blk_rq_unmap_user(bio);
2476 return ret;
2477}
2478
2479EXPORT_SYMBOL(blk_rq_map_user);
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
2503 struct sg_iovec *iov, int iov_count, unsigned int len)
2504{
2505 struct bio *bio;
2506
2507 if (!iov || iov_count <= 0)
2508 return -EINVAL;
2509
2510
2511
2512
2513 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);
2514 if (IS_ERR(bio))
2515 return PTR_ERR(bio);
2516
2517 if (bio->bi_size != len) {
2518 bio_endio(bio, 0);
2519 bio_unmap_user(bio);
2520 return -EINVAL;
2521 }
2522
2523 bio_get(bio);
2524 blk_rq_bio_prep(q, rq, bio);
2525 rq->buffer = rq->data = NULL;
2526 return 0;
2527}
2528
2529EXPORT_SYMBOL(blk_rq_map_user_iov);
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540int blk_rq_unmap_user(struct bio *bio)
2541{
2542 struct bio *mapped_bio;
2543 int ret = 0, ret2;
2544
2545 while (bio) {
2546 mapped_bio = bio;
2547 if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
2548 mapped_bio = bio->bi_private;
2549
2550 ret2 = __blk_rq_unmap_user(mapped_bio);
2551 if (ret2 && !ret)
2552 ret = ret2;
2553
2554 mapped_bio = bio;
2555 bio = bio->bi_next;
2556 bio_put(mapped_bio);
2557 }
2558
2559 return ret;
2560}
2561
2562EXPORT_SYMBOL(blk_rq_unmap_user);
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
2573 unsigned int len, gfp_t gfp_mask)
2574{
2575 struct bio *bio;
2576
2577 if (len > (q->max_hw_sectors << 9))
2578 return -EINVAL;
2579 if (!len || !kbuf)
2580 return -EINVAL;
2581
2582 bio = bio_map_kern(q, kbuf, len, gfp_mask);
2583 if (IS_ERR(bio))
2584 return PTR_ERR(bio);
2585
2586 if (rq_data_dir(rq) == WRITE)
2587 bio->bi_rw |= (1 << BIO_RW);
2588
2589 blk_rq_bio_prep(q, rq, bio);
2590 blk_queue_bounce(q, &rq->bio);
2591 rq->buffer = rq->data = NULL;
2592 return 0;
2593}
2594
2595EXPORT_SYMBOL(blk_rq_map_kern);
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
2610 struct request *rq, int at_head,
2611 rq_end_io_fn *done)
2612{
2613 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2614
2615 rq->rq_disk = bd_disk;
2616 rq->cmd_flags |= REQ_NOMERGE;
2617 rq->end_io = done;
2618 WARN_ON(irqs_disabled());
2619 spin_lock_irq(q->queue_lock);
2620 __elv_add_request(q, rq, where, 1);
2621 __generic_unplug_device(q);
2622 spin_unlock_irq(q->queue_lock);
2623}
2624EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
2638 struct request *rq, int at_head)
2639{
2640 DECLARE_COMPLETION_ONSTACK(wait);
2641 char sense[SCSI_SENSE_BUFFERSIZE];
2642 int err = 0;
2643
2644
2645
2646
2647
2648 rq->ref_count++;
2649
2650 if (!rq->sense) {
2651 memset(sense, 0, sizeof(sense));
2652 rq->sense = sense;
2653 rq->sense_len = 0;
2654 }
2655
2656 rq->end_io_data = &wait;
2657 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2658 wait_for_completion(&wait);
2659
2660 if (rq->errors)
2661 err = -EIO;
2662
2663 return err;
2664}
2665
2666EXPORT_SYMBOL(blk_execute_rq);
2667
2668static void bio_end_empty_barrier(struct bio *bio, int err)
2669{
2670 if (err)
2671 clear_bit(BIO_UPTODATE, &bio->bi_flags);
2672
2673 complete(bio->bi_private);
2674}
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2687{
2688 DECLARE_COMPLETION_ONSTACK(wait);
2689 struct request_queue *q;
2690 struct bio *bio;
2691 int ret;
2692
2693 if (bdev->bd_disk == NULL)
2694 return -ENXIO;
2695
2696 q = bdev_get_queue(bdev);
2697 if (!q)
2698 return -ENXIO;
2699
2700 bio = bio_alloc(GFP_KERNEL, 0);
2701 if (!bio)
2702 return -ENOMEM;
2703
2704 bio->bi_end_io = bio_end_empty_barrier;
2705 bio->bi_private = &wait;
2706 bio->bi_bdev = bdev;
2707 submit_bio(1 << BIO_RW_BARRIER, bio);
2708
2709 wait_for_completion(&wait);
2710
2711
2712
2713
2714
2715
2716 if (error_sector)
2717 *error_sector = bio->bi_sector;
2718
2719 ret = 0;
2720 if (!bio_flagged(bio, BIO_UPTODATE))
2721 ret = -EIO;
2722
2723 bio_put(bio);
2724 return ret;
2725}
2726
2727EXPORT_SYMBOL(blkdev_issue_flush);
2728
2729static void drive_stat_acct(struct request *rq, int new_io)
2730{
2731 int rw = rq_data_dir(rq);
2732
2733 if (!blk_fs_request(rq) || !rq->rq_disk)
2734 return;
2735
2736 if (!new_io) {
2737 __disk_stat_inc(rq->rq_disk, merges[rw]);
2738 } else {
2739 disk_round_stats(rq->rq_disk);
2740 rq->rq_disk->in_flight++;
2741 }
2742}
2743
2744
2745
2746
2747
2748
2749static inline void add_request(struct request_queue * q, struct request * req)
2750{
2751 drive_stat_acct(req, 1);
2752
2753
2754
2755
2756
2757 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2758}
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775void disk_round_stats(struct gendisk *disk)
2776{
2777 unsigned long now = jiffies;
2778
2779 if (now == disk->stamp)
2780 return;
2781
2782 if (disk->in_flight) {
2783 __disk_stat_add(disk, time_in_queue,
2784 disk->in_flight * (now - disk->stamp));
2785 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
2786 }
2787 disk->stamp = now;
2788}
2789
2790EXPORT_SYMBOL_GPL(disk_round_stats);
2791
2792
2793
2794
2795void __blk_put_request(struct request_queue *q, struct request *req)
2796{
2797 if (unlikely(!q))
2798 return;
2799 if (unlikely(--req->ref_count))
2800 return;
2801
2802 elv_completed_request(q, req);
2803
2804
2805
2806
2807
2808 if (req->cmd_flags & REQ_ALLOCED) {
2809 int rw = rq_data_dir(req);
2810 int priv = req->cmd_flags & REQ_ELVPRIV;
2811
2812 BUG_ON(!list_empty(&req->queuelist));
2813 BUG_ON(!hlist_unhashed(&req->hash));
2814
2815 blk_free_request(q, req);
2816 freed_request(q, rw, priv);
2817 }
2818}
2819
2820EXPORT_SYMBOL_GPL(__blk_put_request);
2821
2822void blk_put_request(struct request *req)
2823{
2824 unsigned long flags;
2825 struct request_queue *q = req->q;
2826
2827
2828
2829
2830
2831 if (q) {
2832 spin_lock_irqsave(q->queue_lock, flags);
2833 __blk_put_request(q, req);
2834 spin_unlock_irqrestore(q->queue_lock, flags);
2835 }
2836}
2837
2838EXPORT_SYMBOL(blk_put_request);
2839
2840
2841
2842
2843
2844
2845void blk_end_sync_rq(struct request *rq, int error)
2846{
2847 struct completion *waiting = rq->end_io_data;
2848
2849 rq->end_io_data = NULL;
2850 __blk_put_request(rq->q, rq);
2851
2852
2853
2854
2855
2856 complete(waiting);
2857}
2858EXPORT_SYMBOL(blk_end_sync_rq);
2859
2860
2861
2862
2863static int attempt_merge(struct request_queue *q, struct request *req,
2864 struct request *next)
2865{
2866 if (!rq_mergeable(req) || !rq_mergeable(next))
2867 return 0;
2868
2869
2870
2871
2872 if (req->sector + req->nr_sectors != next->sector)
2873 return 0;
2874
2875 if (rq_data_dir(req) != rq_data_dir(next)
2876 || req->rq_disk != next->rq_disk
2877 || next->special)
2878 return 0;
2879
2880
2881
2882
2883
2884
2885
2886 if (!ll_merge_requests_fn(q, req, next))
2887 return 0;
2888
2889
2890
2891
2892
2893
2894
2895 if (time_after(req->start_time, next->start_time))
2896 req->start_time = next->start_time;
2897
2898 req->biotail->bi_next = next->bio;
2899 req->biotail = next->biotail;
2900
2901 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2902
2903 elv_merge_requests(q, req, next);
2904
2905 if (req->rq_disk) {
2906 disk_round_stats(req->rq_disk);
2907 req->rq_disk->in_flight--;
2908 }
2909
2910 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2911
2912 __blk_put_request(q, next);
2913 return 1;
2914}
2915
2916static inline int attempt_back_merge(struct request_queue *q,
2917 struct request *rq)
2918{
2919 struct request *next = elv_latter_request(q, rq);
2920
2921 if (next)
2922 return attempt_merge(q, rq, next);
2923
2924 return 0;
2925}
2926
2927static inline int attempt_front_merge(struct request_queue *q,
2928 struct request *rq)
2929{
2930 struct request *prev = elv_former_request(q, rq);
2931
2932 if (prev)
2933 return attempt_merge(q, prev, rq);
2934
2935 return 0;
2936}
2937
2938static void init_request_from_bio(struct request *req, struct bio *bio)
2939{
2940 req->cmd_type = REQ_TYPE_FS;
2941
2942
2943
2944
2945 if (bio_rw_ahead(bio) || bio_failfast(bio))
2946 req->cmd_flags |= REQ_FAILFAST;
2947
2948
2949
2950
2951 if (unlikely(bio_barrier(bio)))
2952 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2953
2954 if (bio_sync(bio))
2955 req->cmd_flags |= REQ_RW_SYNC;
2956 if (bio_rw_meta(bio))
2957 req->cmd_flags |= REQ_RW_META;
2958
2959 req->errors = 0;
2960 req->hard_sector = req->sector = bio->bi_sector;
2961 req->ioprio = bio_prio(bio);
2962 req->start_time = jiffies;
2963 blk_rq_bio_prep(req->q, req, bio);
2964}
2965
2966static int __make_request(struct request_queue *q, struct bio *bio)
2967{
2968 struct request *req;
2969 int el_ret, nr_sectors, barrier, err;
2970 const unsigned short prio = bio_prio(bio);
2971 const int sync = bio_sync(bio);
2972 int rw_flags;
2973
2974 nr_sectors = bio_sectors(bio);
2975
2976
2977
2978
2979
2980
2981 blk_queue_bounce(q, &bio);
2982
2983 barrier = bio_barrier(bio);
2984 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2985 err = -EOPNOTSUPP;
2986 goto end_io;
2987 }
2988
2989 spin_lock_irq(q->queue_lock);
2990
2991 if (unlikely(barrier) || elv_queue_empty(q))
2992 goto get_rq;
2993
2994 el_ret = elv_merge(q, &req, bio);
2995 switch (el_ret) {
2996 case ELEVATOR_BACK_MERGE:
2997 BUG_ON(!rq_mergeable(req));
2998
2999 if (!ll_back_merge_fn(q, req, bio))
3000 break;
3001
3002 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
3003
3004 req->biotail->bi_next = bio;
3005 req->biotail = bio;
3006 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
3007 req->ioprio = ioprio_best(req->ioprio, prio);
3008 drive_stat_acct(req, 0);
3009 if (!attempt_back_merge(q, req))
3010 elv_merged_request(q, req, el_ret);
3011 goto out;
3012
3013 case ELEVATOR_FRONT_MERGE:
3014 BUG_ON(!rq_mergeable(req));
3015
3016 if (!ll_front_merge_fn(q, req, bio))
3017 break;
3018
3019 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
3020
3021 bio->bi_next = req->bio;
3022 req->bio = bio;
3023
3024
3025
3026
3027
3028
3029 req->buffer = bio_data(bio);
3030 req->current_nr_sectors = bio_cur_sectors(bio);
3031 req->hard_cur_sectors = req->current_nr_sectors;
3032 req->sector = req->hard_sector = bio->bi_sector;
3033 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
3034 req->ioprio = ioprio_best(req->ioprio, prio);
3035 drive_stat_acct(req, 0);
3036 if (!attempt_front_merge(q, req))
3037 elv_merged_request(q, req, el_ret);
3038 goto out;
3039
3040
3041 default:
3042 ;
3043 }
3044
3045get_rq:
3046
3047
3048
3049
3050
3051 rw_flags = bio_data_dir(bio);
3052 if (sync)
3053 rw_flags |= REQ_RW_SYNC;
3054
3055
3056
3057
3058
3059 req = get_request_wait(q, rw_flags, bio);
3060
3061
3062
3063
3064
3065
3066
3067 init_request_from_bio(req, bio);
3068
3069 spin_lock_irq(q->queue_lock);
3070 if (elv_queue_empty(q))
3071 blk_plug_device(q);
3072 add_request(q, req);
3073out:
3074 if (sync)
3075 __generic_unplug_device(q);
3076
3077 spin_unlock_irq(q->queue_lock);
3078 return 0;
3079
3080end_io:
3081 bio_endio(bio, err);
3082 return 0;
3083}
3084
3085
3086
3087
3088static inline void blk_partition_remap(struct bio *bio)
3089{
3090 struct block_device *bdev = bio->bi_bdev;
3091
3092 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
3093 struct hd_struct *p = bdev->bd_part;
3094 const int rw = bio_data_dir(bio);
3095
3096 p->sectors[rw] += bio_sectors(bio);
3097 p->ios[rw]++;
3098
3099 bio->bi_sector += p->start_sect;
3100 bio->bi_bdev = bdev->bd_contains;
3101
3102 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
3103 bdev->bd_dev, bio->bi_sector,
3104 bio->bi_sector - p->start_sect);
3105 }
3106}
3107
3108static void handle_bad_sector(struct bio *bio)
3109{
3110 char b[BDEVNAME_SIZE];
3111
3112 printk(KERN_INFO "attempt to access beyond end of device\n");
3113 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
3114 bdevname(bio->bi_bdev, b),
3115 bio->bi_rw,
3116 (unsigned long long)bio->bi_sector + bio_sectors(bio),
3117 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
3118
3119 set_bit(BIO_EOF, &bio->bi_flags);
3120}
3121
3122#ifdef CONFIG_FAIL_MAKE_REQUEST
3123
3124static DECLARE_FAULT_ATTR(fail_make_request);
3125
3126static int __init setup_fail_make_request(char *str)
3127{
3128 return setup_fault_attr(&fail_make_request, str);
3129}
3130__setup("fail_make_request=", setup_fail_make_request);
3131
3132static int should_fail_request(struct bio *bio)
3133{
3134 if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) ||
3135 (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail))
3136 return should_fail(&fail_make_request, bio->bi_size);
3137
3138 return 0;
3139}
3140
3141static int __init fail_make_request_debugfs(void)
3142{
3143 return init_fault_attr_dentries(&fail_make_request,
3144 "fail_make_request");
3145}
3146
3147late_initcall(fail_make_request_debugfs);
3148
3149#else
3150
3151static inline int should_fail_request(struct bio *bio)
3152{
3153 return 0;
3154}
3155
3156#endif
3157
3158
3159
3160
3161static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
3162{
3163 sector_t maxsector;
3164
3165 if (!nr_sectors)
3166 return 0;
3167
3168
3169 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
3170 if (maxsector) {
3171 sector_t sector = bio->bi_sector;
3172
3173 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
3174
3175
3176
3177
3178
3179 handle_bad_sector(bio);
3180 return 1;
3181 }
3182 }
3183
3184 return 0;
3185}
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211static inline void __generic_make_request(struct bio *bio)
3212{
3213 struct request_queue *q;
3214 sector_t old_sector;
3215 int ret, nr_sectors = bio_sectors(bio);
3216 dev_t old_dev;
3217 int err = -EIO;
3218
3219 might_sleep();
3220
3221 if (bio_check_eod(bio, nr_sectors))
3222 goto end_io;
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232 old_sector = -1;
3233 old_dev = 0;
3234 do {
3235 char b[BDEVNAME_SIZE];
3236
3237 q = bdev_get_queue(bio->bi_bdev);
3238 if (!q) {
3239 printk(KERN_ERR
3240 "generic_make_request: Trying to access "
3241 "nonexistent block-device %s (%Lu)\n",
3242 bdevname(bio->bi_bdev, b),
3243 (long long) bio->bi_sector);
3244end_io:
3245 bio_endio(bio, err);
3246 break;
3247 }
3248
3249 if (unlikely(nr_sectors > q->max_hw_sectors)) {
3250 printk("bio too big device %s (%u > %u)\n",
3251 bdevname(bio->bi_bdev, b),
3252 bio_sectors(bio),
3253 q->max_hw_sectors);
3254 goto end_io;
3255 }
3256
3257 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
3258 goto end_io;
3259
3260 if (should_fail_request(bio))
3261 goto end_io;
3262
3263
3264
3265
3266
3267 blk_partition_remap(bio);
3268
3269 if (old_sector != -1)
3270 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
3271 old_sector);
3272
3273 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
3274
3275 old_sector = bio->bi_sector;
3276 old_dev = bio->bi_bdev->bd_dev;
3277
3278 if (bio_check_eod(bio, nr_sectors))
3279 goto end_io;
3280 if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
3281 err = -EOPNOTSUPP;
3282 goto end_io;
3283 }
3284
3285 ret = q->make_request_fn(q, bio);
3286 } while (ret);
3287}
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300void generic_make_request(struct bio *bio)
3301{
3302 if (current->bio_tail) {
3303
3304 *(current->bio_tail) = bio;
3305 bio->bi_next = NULL;
3306 current->bio_tail = &bio->bi_next;
3307 return;
3308 }
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327 BUG_ON(bio->bi_next);
3328 do {
3329 current->bio_list = bio->bi_next;
3330 if (bio->bi_next == NULL)
3331 current->bio_tail = ¤t->bio_list;
3332 else
3333 bio->bi_next = NULL;
3334 __generic_make_request(bio);
3335 bio = current->bio_list;
3336 } while (bio);
3337 current->bio_tail = NULL;
3338}
3339
3340EXPORT_SYMBOL(generic_make_request);
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352void submit_bio(int rw, struct bio *bio)
3353{
3354 int count = bio_sectors(bio);
3355
3356 bio->bi_rw |= rw;
3357
3358
3359
3360
3361
3362 if (!bio_empty_barrier(bio)) {
3363
3364 BIO_BUG_ON(!bio->bi_size);
3365 BIO_BUG_ON(!bio->bi_io_vec);
3366
3367 if (rw & WRITE) {
3368 count_vm_events(PGPGOUT, count);
3369 } else {
3370 task_io_account_read(bio->bi_size);
3371 count_vm_events(PGPGIN, count);
3372 }
3373
3374 if (unlikely(block_dump)) {
3375 char b[BDEVNAME_SIZE];
3376 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
3377 current->comm, task_pid_nr(current),
3378 (rw & WRITE) ? "WRITE" : "READ",
3379 (unsigned long long)bio->bi_sector,
3380 bdevname(bio->bi_bdev,b));
3381 }
3382 }
3383
3384 generic_make_request(bio);
3385}
3386
3387EXPORT_SYMBOL(submit_bio);
3388
3389static void blk_recalc_rq_sectors(struct request *rq, int nsect)
3390{
3391 if (blk_fs_request(rq)) {
3392 rq->hard_sector += nsect;
3393 rq->hard_nr_sectors -= nsect;
3394
3395
3396
3397
3398 if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
3399 (rq->sector <= rq->hard_sector)) {
3400 rq->sector = rq->hard_sector;
3401 rq->nr_sectors = rq->hard_nr_sectors;
3402 rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
3403 rq->current_nr_sectors = rq->hard_cur_sectors;
3404 rq->buffer = bio_data(rq->bio);
3405 }
3406
3407
3408
3409
3410
3411 if (rq->nr_sectors < rq->current_nr_sectors) {
3412 printk("blk: request botched\n");
3413 rq->nr_sectors = rq->current_nr_sectors;
3414 }
3415 }
3416}
3417
3418static int __end_that_request_first(struct request *req, int uptodate,
3419 int nr_bytes)
3420{
3421 int total_bytes, bio_nbytes, error, next_idx = 0;
3422 struct bio *bio;
3423
3424 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
3425
3426
3427
3428
3429 error = 0;
3430 if (end_io_error(uptodate))
3431 error = !uptodate ? -EIO : uptodate;
3432
3433
3434
3435
3436
3437 if (!blk_pc_request(req))
3438 req->errors = 0;
3439
3440 if (!uptodate) {
3441 if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
3442 printk("end_request: I/O error, dev %s, sector %llu\n",
3443 req->rq_disk ? req->rq_disk->disk_name : "?",
3444 (unsigned long long)req->sector);
3445 }
3446
3447 if (blk_fs_request(req) && req->rq_disk) {
3448 const int rw = rq_data_dir(req);
3449
3450 disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
3451 }
3452
3453 total_bytes = bio_nbytes = 0;
3454 while ((bio = req->bio) != NULL) {
3455 int nbytes;
3456
3457
3458
3459
3460
3461
3462 if (blk_empty_barrier(req))
3463 bio->bi_sector = req->sector;
3464
3465 if (nr_bytes >= bio->bi_size) {
3466 req->bio = bio->bi_next;
3467 nbytes = bio->bi_size;
3468 req_bio_endio(req, bio, nbytes, error);
3469 next_idx = 0;
3470 bio_nbytes = 0;
3471 } else {
3472 int idx = bio->bi_idx + next_idx;
3473
3474 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
3475 blk_dump_rq_flags(req, "__end_that");
3476 printk("%s: bio idx %d >= vcnt %d\n",
3477 __FUNCTION__,
3478 bio->bi_idx, bio->bi_vcnt);
3479 break;
3480 }
3481
3482 nbytes = bio_iovec_idx(bio, idx)->bv_len;
3483 BIO_BUG_ON(nbytes > bio->bi_size);
3484
3485
3486
3487
3488 if (unlikely(nbytes > nr_bytes)) {
3489 bio_nbytes += nr_bytes;
3490 total_bytes += nr_bytes;
3491 break;
3492 }
3493
3494
3495
3496
3497 next_idx++;
3498 bio_nbytes += nbytes;
3499 }
3500
3501 total_bytes += nbytes;
3502 nr_bytes -= nbytes;
3503
3504 if ((bio = req->bio)) {
3505
3506
3507
3508 if (unlikely(nr_bytes <= 0))
3509 break;
3510 }
3511 }
3512
3513
3514
3515
3516 if (!req->bio)
3517 return 0;
3518
3519
3520
3521
3522 if (bio_nbytes) {
3523 req_bio_endio(req, bio, bio_nbytes, error);
3524 bio->bi_idx += next_idx;
3525 bio_iovec(bio)->bv_offset += nr_bytes;
3526 bio_iovec(bio)->bv_len -= nr_bytes;
3527 }
3528
3529 blk_recalc_rq_sectors(req, total_bytes >> 9);
3530 blk_recalc_rq_segments(req);
3531 return 1;
3532}
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
3549{
3550 return __end_that_request_first(req, uptodate, nr_sectors << 9);
3551}
3552
3553EXPORT_SYMBOL(end_that_request_first);
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3571{
3572 return __end_that_request_first(req, uptodate, nr_bytes);
3573}
3574
3575EXPORT_SYMBOL(end_that_request_chunk);
3576
3577
3578
3579
3580
3581static void blk_done_softirq(struct softirq_action *h)
3582{
3583 struct list_head *cpu_list, local_list;
3584
3585 local_irq_disable();
3586 cpu_list = &__get_cpu_var(blk_cpu_done);
3587 list_replace_init(cpu_list, &local_list);
3588 local_irq_enable();
3589
3590 while (!list_empty(&local_list)) {
3591 struct request *rq = list_entry(local_list.next, struct request, donelist);
3592
3593 list_del_init(&rq->donelist);
3594 rq->q->softirq_done_fn(rq);
3595 }
3596}
3597
3598static int __cpuinit blk_cpu_notify(struct notifier_block *self, unsigned long action,
3599 void *hcpu)
3600{
3601
3602
3603
3604
3605 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
3606 int cpu = (unsigned long) hcpu;
3607
3608 local_irq_disable();
3609 list_splice_init(&per_cpu(blk_cpu_done, cpu),
3610 &__get_cpu_var(blk_cpu_done));
3611 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3612 local_irq_enable();
3613 }
3614
3615 return NOTIFY_OK;
3616}
3617
3618
3619static struct notifier_block blk_cpu_notifier __cpuinitdata = {
3620 .notifier_call = blk_cpu_notify,
3621};
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635void blk_complete_request(struct request *req)
3636{
3637 struct list_head *cpu_list;
3638 unsigned long flags;
3639
3640 BUG_ON(!req->q->softirq_done_fn);
3641
3642 local_irq_save(flags);
3643
3644 cpu_list = &__get_cpu_var(blk_cpu_done);
3645 list_add_tail(&req->donelist, cpu_list);
3646 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3647
3648 local_irq_restore(flags);
3649}
3650
3651EXPORT_SYMBOL(blk_complete_request);
3652
3653
3654
3655
3656void end_that_request_last(struct request *req, int uptodate)
3657{
3658 struct gendisk *disk = req->rq_disk;
3659 int error;
3660
3661
3662
3663
3664 error = 0;
3665 if (end_io_error(uptodate))
3666 error = !uptodate ? -EIO : uptodate;
3667
3668 if (unlikely(laptop_mode) && blk_fs_request(req))
3669 laptop_io_completion();
3670
3671
3672
3673
3674
3675
3676 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
3677 unsigned long duration = jiffies - req->start_time;
3678 const int rw = rq_data_dir(req);
3679
3680 __disk_stat_inc(disk, ios[rw]);
3681 __disk_stat_add(disk, ticks[rw], duration);
3682 disk_round_stats(disk);
3683 disk->in_flight--;
3684 }
3685 if (req->end_io)
3686 req->end_io(req, error);
3687 else
3688 __blk_put_request(req->q, req);
3689}
3690
3691EXPORT_SYMBOL(end_that_request_last);
3692
3693static inline void __end_request(struct request *rq, int uptodate,
3694 unsigned int nr_bytes, int dequeue)
3695{
3696 if (!end_that_request_chunk(rq, uptodate, nr_bytes)) {
3697 if (dequeue)
3698 blkdev_dequeue_request(rq);
3699 add_disk_randomness(rq->rq_disk);
3700 end_that_request_last(rq, uptodate);
3701 }
3702}
3703
3704static unsigned int rq_byte_size(struct request *rq)
3705{
3706 if (blk_fs_request(rq))
3707 return rq->hard_nr_sectors << 9;
3708
3709 return rq->data_len;
3710}
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723void end_queued_request(struct request *rq, int uptodate)
3724{
3725 __end_request(rq, uptodate, rq_byte_size(rq), 1);
3726}
3727EXPORT_SYMBOL(end_queued_request);
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740void end_dequeued_request(struct request *rq, int uptodate)
3741{
3742 __end_request(rq, uptodate, rq_byte_size(rq), 0);
3743}
3744EXPORT_SYMBOL(end_dequeued_request);
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766void end_request(struct request *req, int uptodate)
3767{
3768 __end_request(req, uptodate, req->hard_cur_sectors << 9, 1);
3769}
3770EXPORT_SYMBOL(end_request);
3771
3772static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
3773 struct bio *bio)
3774{
3775
3776 rq->cmd_flags |= (bio->bi_rw & 3);
3777
3778 rq->nr_phys_segments = bio_phys_segments(q, bio);
3779 rq->nr_hw_segments = bio_hw_segments(q, bio);
3780 rq->current_nr_sectors = bio_cur_sectors(bio);
3781 rq->hard_cur_sectors = rq->current_nr_sectors;
3782 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
3783 rq->buffer = bio_data(bio);
3784 rq->data_len = bio->bi_size;
3785
3786 rq->bio = rq->biotail = bio;
3787
3788 if (bio->bi_bdev)
3789 rq->rq_disk = bio->bi_bdev->bd_disk;
3790}
3791
3792int kblockd_schedule_work(struct work_struct *work)
3793{
3794 return queue_work(kblockd_workqueue, work);
3795}
3796
3797EXPORT_SYMBOL(kblockd_schedule_work);
3798
3799void kblockd_flush_work(struct work_struct *work)
3800{
3801 cancel_work_sync(work);
3802}
3803EXPORT_SYMBOL(kblockd_flush_work);
3804
3805int __init blk_dev_init(void)
3806{
3807 int i;
3808
3809 kblockd_workqueue = create_workqueue("kblockd");
3810 if (!kblockd_workqueue)
3811 panic("Failed to create kblockd\n");
3812
3813 request_cachep = kmem_cache_create("blkdev_requests",
3814 sizeof(struct request), 0, SLAB_PANIC, NULL);
3815
3816 requestq_cachep = kmem_cache_create("blkdev_queue",
3817 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
3818
3819 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3820 sizeof(struct io_context), 0, SLAB_PANIC, NULL);
3821
3822 for_each_possible_cpu(i)
3823 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
3824
3825 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
3826 register_hotcpu_notifier(&blk_cpu_notifier);
3827
3828 blk_max_low_pfn = max_low_pfn - 1;
3829 blk_max_pfn = max_pfn - 1;
3830
3831 return 0;
3832}
3833
3834
3835
3836
3837void put_io_context(struct io_context *ioc)
3838{
3839 if (ioc == NULL)
3840 return;
3841
3842 BUG_ON(atomic_read(&ioc->refcount) == 0);
3843
3844 if (atomic_dec_and_test(&ioc->refcount)) {
3845 struct cfq_io_context *cic;
3846
3847 rcu_read_lock();
3848 if (ioc->aic && ioc->aic->dtor)
3849 ioc->aic->dtor(ioc->aic);
3850 if (ioc->cic_root.rb_node != NULL) {
3851 struct rb_node *n = rb_first(&ioc->cic_root);
3852
3853 cic = rb_entry(n, struct cfq_io_context, rb_node);
3854 cic->dtor(ioc);
3855 }
3856 rcu_read_unlock();
3857
3858 kmem_cache_free(iocontext_cachep, ioc);
3859 }
3860}
3861EXPORT_SYMBOL(put_io_context);
3862
3863
3864void exit_io_context(void)
3865{
3866 struct io_context *ioc;
3867 struct cfq_io_context *cic;
3868
3869 task_lock(current);
3870 ioc = current->io_context;
3871 current->io_context = NULL;
3872 task_unlock(current);
3873
3874 ioc->task = NULL;
3875 if (ioc->aic && ioc->aic->exit)
3876 ioc->aic->exit(ioc->aic);
3877 if (ioc->cic_root.rb_node != NULL) {
3878 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
3879 cic->exit(ioc);
3880 }
3881
3882 put_io_context(ioc);
3883}
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893static struct io_context *current_io_context(gfp_t gfp_flags, int node)
3894{
3895 struct task_struct *tsk = current;
3896 struct io_context *ret;
3897
3898 ret = tsk->io_context;
3899 if (likely(ret))
3900 return ret;
3901
3902 ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node);
3903 if (ret) {
3904 atomic_set(&ret->refcount, 1);
3905 ret->task = current;
3906 ret->ioprio_changed = 0;
3907 ret->last_waited = jiffies;
3908 ret->nr_batch_requests = 0;
3909 ret->aic = NULL;
3910 ret->cic_root.rb_node = NULL;
3911 ret->ioc_data = NULL;
3912
3913 smp_wmb();
3914 tsk->io_context = ret;
3915 }
3916
3917 return ret;
3918}
3919
3920
3921
3922
3923
3924
3925
3926struct io_context *get_io_context(gfp_t gfp_flags, int node)
3927{
3928 struct io_context *ret;
3929 ret = current_io_context(gfp_flags, node);
3930 if (likely(ret))
3931 atomic_inc(&ret->refcount);
3932 return ret;
3933}
3934EXPORT_SYMBOL(get_io_context);
3935
3936void copy_io_context(struct io_context **pdst, struct io_context **psrc)
3937{
3938 struct io_context *src = *psrc;
3939 struct io_context *dst = *pdst;
3940
3941 if (src) {
3942 BUG_ON(atomic_read(&src->refcount) == 0);
3943 atomic_inc(&src->refcount);
3944 put_io_context(dst);
3945 *pdst = src;
3946 }
3947}
3948EXPORT_SYMBOL(copy_io_context);
3949
3950void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
3951{
3952 struct io_context *temp;
3953 temp = *ioc1;
3954 *ioc1 = *ioc2;
3955 *ioc2 = temp;
3956}
3957EXPORT_SYMBOL(swap_io_context);
3958
3959
3960
3961
3962struct queue_sysfs_entry {
3963 struct attribute attr;
3964 ssize_t (*show)(struct request_queue *, char *);
3965 ssize_t (*store)(struct request_queue *, const char *, size_t);
3966};
3967
3968static ssize_t
3969queue_var_show(unsigned int var, char *page)
3970{
3971 return sprintf(page, "%d\n", var);
3972}
3973
3974static ssize_t
3975queue_var_store(unsigned long *var, const char *page, size_t count)
3976{
3977 char *p = (char *) page;
3978
3979 *var = simple_strtoul(p, &p, 10);
3980 return count;
3981}
3982
3983static ssize_t queue_requests_show(struct request_queue *q, char *page)
3984{
3985 return queue_var_show(q->nr_requests, (page));
3986}
3987
3988static ssize_t
3989queue_requests_store(struct request_queue *q, const char *page, size_t count)
3990{
3991 struct request_list *rl = &q->rq;
3992 unsigned long nr;
3993 int ret = queue_var_store(&nr, page, count);
3994 if (nr < BLKDEV_MIN_RQ)
3995 nr = BLKDEV_MIN_RQ;
3996
3997 spin_lock_irq(q->queue_lock);
3998 q->nr_requests = nr;
3999 blk_queue_congestion_threshold(q);
4000
4001 if (rl->count[READ] >= queue_congestion_on_threshold(q))
4002 blk_set_queue_congested(q, READ);
4003 else if (rl->count[READ] < queue_congestion_off_threshold(q))
4004 blk_clear_queue_congested(q, READ);
4005
4006 if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
4007 blk_set_queue_congested(q, WRITE);
4008 else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
4009 blk_clear_queue_congested(q, WRITE);
4010
4011 if (rl->count[READ] >= q->nr_requests) {
4012 blk_set_queue_full(q, READ);
4013 } else if (rl->count[READ]+1 <= q->nr_requests) {
4014 blk_clear_queue_full(q, READ);
4015 wake_up(&rl->wait[READ]);
4016 }
4017
4018 if (rl->count[WRITE] >= q->nr_requests) {
4019 blk_set_queue_full(q, WRITE);
4020 } else if (rl->count[WRITE]+1 <= q->nr_requests) {
4021 blk_clear_queue_full(q, WRITE);
4022 wake_up(&rl->wait[WRITE]);
4023 }
4024 spin_unlock_irq(q->queue_lock);
4025 return ret;
4026}
4027
4028static ssize_t queue_ra_show(struct request_queue *q, char *page)
4029{
4030 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
4031
4032 return queue_var_show(ra_kb, (page));
4033}
4034
4035static ssize_t
4036queue_ra_store(struct request_queue *q, const char *page, size_t count)
4037{
4038 unsigned long ra_kb;
4039 ssize_t ret = queue_var_store(&ra_kb, page, count);
4040
4041 spin_lock_irq(q->queue_lock);
4042 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
4043 spin_unlock_irq(q->queue_lock);
4044
4045 return ret;
4046}
4047
4048static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
4049{
4050 int max_sectors_kb = q->max_sectors >> 1;
4051
4052 return queue_var_show(max_sectors_kb, (page));
4053}
4054
4055static ssize_t
4056queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
4057{
4058 unsigned long max_sectors_kb,
4059 max_hw_sectors_kb = q->max_hw_sectors >> 1,
4060 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
4061 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
4062
4063 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
4064 return -EINVAL;
4065
4066
4067
4068
4069 spin_lock_irq(q->queue_lock);
4070 q->max_sectors = max_sectors_kb << 1;
4071 spin_unlock_irq(q->queue_lock);
4072
4073 return ret;
4074}
4075
4076static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
4077{
4078 int max_hw_sectors_kb = q->max_hw_sectors >> 1;
4079
4080 return queue_var_show(max_hw_sectors_kb, (page));
4081}
4082
4083
4084static struct queue_sysfs_entry queue_requests_entry = {
4085 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
4086 .show = queue_requests_show,
4087 .store = queue_requests_store,
4088};
4089
4090static struct queue_sysfs_entry queue_ra_entry = {
4091 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
4092 .show = queue_ra_show,
4093 .store = queue_ra_store,
4094};
4095
4096static struct queue_sysfs_entry queue_max_sectors_entry = {
4097 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
4098 .show = queue_max_sectors_show,
4099 .store = queue_max_sectors_store,
4100};
4101
4102static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
4103 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
4104 .show = queue_max_hw_sectors_show,
4105};
4106
4107static struct queue_sysfs_entry queue_iosched_entry = {
4108 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
4109 .show = elv_iosched_show,
4110 .store = elv_iosched_store,
4111};
4112
4113static struct attribute *default_attrs[] = {
4114 &queue_requests_entry.attr,
4115 &queue_ra_entry.attr,
4116 &queue_max_hw_sectors_entry.attr,
4117 &queue_max_sectors_entry.attr,
4118 &queue_iosched_entry.attr,
4119 NULL,
4120};
4121
4122#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
4123
4124static ssize_t
4125queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4126{
4127 struct queue_sysfs_entry *entry = to_queue(attr);
4128 struct request_queue *q =
4129 container_of(kobj, struct request_queue, kobj);
4130 ssize_t res;
4131
4132 if (!entry->show)
4133 return -EIO;
4134 mutex_lock(&q->sysfs_lock);
4135 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
4136 mutex_unlock(&q->sysfs_lock);
4137 return -ENOENT;
4138 }
4139 res = entry->show(q, page);
4140 mutex_unlock(&q->sysfs_lock);
4141 return res;
4142}
4143
4144static ssize_t
4145queue_attr_store(struct kobject *kobj, struct attribute *attr,
4146 const char *page, size_t length)
4147{
4148 struct queue_sysfs_entry *entry = to_queue(attr);
4149 struct request_queue *q = container_of(kobj, struct request_queue, kobj);
4150
4151 ssize_t res;
4152
4153 if (!entry->store)
4154 return -EIO;
4155 mutex_lock(&q->sysfs_lock);
4156 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
4157 mutex_unlock(&q->sysfs_lock);
4158 return -ENOENT;
4159 }
4160 res = entry->store(q, page, length);
4161 mutex_unlock(&q->sysfs_lock);
4162 return res;
4163}
4164
4165static struct sysfs_ops queue_sysfs_ops = {
4166 .show = queue_attr_show,
4167 .store = queue_attr_store,
4168};
4169
4170static struct kobj_type queue_ktype = {
4171 .sysfs_ops = &queue_sysfs_ops,
4172 .default_attrs = default_attrs,
4173 .release = blk_release_queue,
4174};
4175
4176int blk_register_queue(struct gendisk *disk)
4177{
4178 int ret;
4179
4180 struct request_queue *q = disk->queue;
4181
4182 if (!q || !q->request_fn)
4183 return -ENXIO;
4184
4185 q->kobj.parent = kobject_get(&disk->kobj);
4186
4187 ret = kobject_add(&q->kobj);
4188 if (ret < 0)
4189 return ret;
4190
4191 kobject_uevent(&q->kobj, KOBJ_ADD);
4192
4193 ret = elv_register_queue(q);
4194 if (ret) {
4195 kobject_uevent(&q->kobj, KOBJ_REMOVE);
4196 kobject_del(&q->kobj);
4197 return ret;
4198 }
4199
4200 return 0;
4201}
4202
4203void blk_unregister_queue(struct gendisk *disk)
4204{
4205 struct request_queue *q = disk->queue;
4206
4207 if (q && q->request_fn) {
4208 elv_unregister_queue(q);
4209
4210 kobject_uevent(&q->kobj, KOBJ_REMOVE);
4211 kobject_del(&q->kobj);
4212 kobject_put(&disk->kobj);
4213 }
4214}
4215