1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/backing-dev.h>
16#include <linux/bio.h>
17#include <linux/blkdev.h>
18#include <linux/highmem.h>
19#include <linux/mm.h>
20#include <linux/kernel_stat.h>
21#include <linux/string.h>
22#include <linux/init.h>
23#include <linux/bootmem.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/interrupt.h>
29#include <linux/cpu.h>
30#include <linux/blktrace_api.h>
31
32
33
34
35#include <scsi/scsi_cmnd.h>
36
37static void blk_unplug_work(void *data);
38static void blk_unplug_timeout(unsigned long data);
39static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
40static void init_request_from_bio(struct request *req, struct bio *bio);
41static int __make_request(request_queue_t *q, struct bio *bio);
42
43
44
45
46static kmem_cache_t *request_cachep;
47
48
49
50
51static kmem_cache_t *requestq_cachep;
52
53
54
55
56static kmem_cache_t *iocontext_cachep;
57
58static wait_queue_head_t congestion_wqh[2] = {
59 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
60 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
61 };
62
63
64
65
66static struct workqueue_struct *kblockd_workqueue;
67
68unsigned long blk_max_low_pfn, blk_max_pfn;
69
70EXPORT_SYMBOL(blk_max_low_pfn);
71EXPORT_SYMBOL(blk_max_pfn);
72
73static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
74
75
76#define BLK_BATCH_TIME (HZ/50UL)
77
78
79#define BLK_BATCH_REQ 32
80
81
82
83
84
85
86static inline int queue_congestion_on_threshold(struct request_queue *q)
87{
88 return q->nr_congestion_on;
89}
90
91
92
93
94static inline int queue_congestion_off_threshold(struct request_queue *q)
95{
96 return q->nr_congestion_off;
97}
98
99static void blk_queue_congestion_threshold(struct request_queue *q)
100{
101 int nr;
102
103 nr = q->nr_requests - (q->nr_requests / 8) + 1;
104 if (nr > q->nr_requests)
105 nr = q->nr_requests;
106 q->nr_congestion_on = nr;
107
108 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
109 if (nr < 1)
110 nr = 1;
111 q->nr_congestion_off = nr;
112}
113
114
115
116
117
118
119static void clear_queue_congested(request_queue_t *q, int rw)
120{
121 enum bdi_state bit;
122 wait_queue_head_t *wqh = &congestion_wqh[rw];
123
124 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
125 clear_bit(bit, &q->backing_dev_info.state);
126 smp_mb__after_clear_bit();
127 if (waitqueue_active(wqh))
128 wake_up(wqh);
129}
130
131
132
133
134
135static void set_queue_congested(request_queue_t *q, int rw)
136{
137 enum bdi_state bit;
138
139 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
140 set_bit(bit, &q->backing_dev_info.state);
141}
142
143
144
145
146
147
148
149
150
151
152struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
153{
154 struct backing_dev_info *ret = NULL;
155 request_queue_t *q = bdev_get_queue(bdev);
156
157 if (q)
158 ret = &q->backing_dev_info;
159 return ret;
160}
161
162EXPORT_SYMBOL(blk_get_backing_dev_info);
163
164void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
165{
166 q->activity_fn = fn;
167 q->activity_data = data;
168}
169
170EXPORT_SYMBOL(blk_queue_activity_fn);
171
172
173
174
175
176
177
178
179
180
181
182
183void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
184{
185 q->prep_rq_fn = pfn;
186}
187
188EXPORT_SYMBOL(blk_queue_prep_rq);
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
207{
208 q->merge_bvec_fn = mbfn;
209}
210
211EXPORT_SYMBOL(blk_queue_merge_bvec);
212
213void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
214{
215 q->softirq_done_fn = fn;
216}
217
218EXPORT_SYMBOL(blk_queue_softirq_done);
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
243{
244
245
246
247 q->nr_requests = BLKDEV_MAX_RQ;
248 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
249 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
250 q->make_request_fn = mfn;
251 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
252 q->backing_dev_info.state = 0;
253 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
254 blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
255 blk_queue_hardsect_size(q, 512);
256 blk_queue_dma_alignment(q, 511);
257 blk_queue_congestion_threshold(q);
258 q->nr_batching = BLK_BATCH_REQ;
259
260 q->unplug_thresh = 4;
261 q->unplug_delay = (3 * HZ) / 1000;
262 if (q->unplug_delay == 0)
263 q->unplug_delay = 1;
264
265 INIT_WORK(&q->unplug_work, blk_unplug_work, q);
266
267 q->unplug_timer.function = blk_unplug_timeout;
268 q->unplug_timer.data = (unsigned long)q;
269
270
271
272
273 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
274
275 blk_queue_activity_fn(q, NULL, NULL);
276}
277
278EXPORT_SYMBOL(blk_queue_make_request);
279
280static inline void rq_init(request_queue_t *q, struct request *rq)
281{
282 INIT_LIST_HEAD(&rq->queuelist);
283 INIT_LIST_HEAD(&rq->donelist);
284
285 rq->errors = 0;
286 rq->rq_status = RQ_ACTIVE;
287 rq->bio = rq->biotail = NULL;
288 rq->ioprio = 0;
289 rq->buffer = NULL;
290 rq->ref_count = 1;
291 rq->q = q;
292 rq->waiting = NULL;
293 rq->special = NULL;
294 rq->data_len = 0;
295 rq->data = NULL;
296 rq->nr_phys_segments = 0;
297 rq->sense = NULL;
298 rq->end_io = NULL;
299 rq->end_io_data = NULL;
300 rq->completion_data = NULL;
301}
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316int blk_queue_ordered(request_queue_t *q, unsigned ordered,
317 prepare_flush_fn *prepare_flush_fn)
318{
319 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
320 prepare_flush_fn == NULL) {
321 printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
322 return -EINVAL;
323 }
324
325 if (ordered != QUEUE_ORDERED_NONE &&
326 ordered != QUEUE_ORDERED_DRAIN &&
327 ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
328 ordered != QUEUE_ORDERED_DRAIN_FUA &&
329 ordered != QUEUE_ORDERED_TAG &&
330 ordered != QUEUE_ORDERED_TAG_FLUSH &&
331 ordered != QUEUE_ORDERED_TAG_FUA) {
332 printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
333 return -EINVAL;
334 }
335
336 q->ordered = ordered;
337 q->next_ordered = ordered;
338 q->prepare_flush_fn = prepare_flush_fn;
339
340 return 0;
341}
342
343EXPORT_SYMBOL(blk_queue_ordered);
344
345
346
347
348
349
350
351
352
353
354
355void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
356{
357 q->issue_flush_fn = iff;
358}
359
360EXPORT_SYMBOL(blk_queue_issue_flush_fn);
361
362
363
364
365inline unsigned blk_ordered_cur_seq(request_queue_t *q)
366{
367 if (!q->ordseq)
368 return 0;
369 return 1 << ffz(q->ordseq);
370}
371
372unsigned blk_ordered_req_seq(struct request *rq)
373{
374 request_queue_t *q = rq->q;
375
376 BUG_ON(q->ordseq == 0);
377
378 if (rq == &q->pre_flush_rq)
379 return QUEUE_ORDSEQ_PREFLUSH;
380 if (rq == &q->bar_rq)
381 return QUEUE_ORDSEQ_BAR;
382 if (rq == &q->post_flush_rq)
383 return QUEUE_ORDSEQ_POSTFLUSH;
384
385 if ((rq->flags & REQ_ORDERED_COLOR) ==
386 (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
387 return QUEUE_ORDSEQ_DRAIN;
388 else
389 return QUEUE_ORDSEQ_DONE;
390}
391
392void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
393{
394 struct request *rq;
395 int uptodate;
396
397 if (error && !q->orderr)
398 q->orderr = error;
399
400 BUG_ON(q->ordseq & seq);
401 q->ordseq |= seq;
402
403 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
404 return;
405
406
407
408
409 rq = q->orig_bar_rq;
410 uptodate = q->orderr ? q->orderr : 1;
411
412 q->ordseq = 0;
413
414 end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
415 end_that_request_last(rq, uptodate);
416}
417
418static void pre_flush_end_io(struct request *rq, int error)
419{
420 elv_completed_request(rq->q, rq);
421 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
422}
423
424static void bar_end_io(struct request *rq, int error)
425{
426 elv_completed_request(rq->q, rq);
427 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
428}
429
430static void post_flush_end_io(struct request *rq, int error)
431{
432 elv_completed_request(rq->q, rq);
433 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
434}
435
436static void queue_flush(request_queue_t *q, unsigned which)
437{
438 struct request *rq;
439 rq_end_io_fn *end_io;
440
441 if (which == QUEUE_ORDERED_PREFLUSH) {
442 rq = &q->pre_flush_rq;
443 end_io = pre_flush_end_io;
444 } else {
445 rq = &q->post_flush_rq;
446 end_io = post_flush_end_io;
447 }
448
449 rq_init(q, rq);
450 rq->flags = REQ_HARDBARRIER;
451 rq->elevator_private = NULL;
452 rq->rq_disk = q->bar_rq.rq_disk;
453 rq->rl = NULL;
454 rq->end_io = end_io;
455 q->prepare_flush_fn(q, rq);
456
457 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
458}
459
460static inline struct request *start_ordered(request_queue_t *q,
461 struct request *rq)
462{
463 q->bi_size = 0;
464 q->orderr = 0;
465 q->ordered = q->next_ordered;
466 q->ordseq |= QUEUE_ORDSEQ_STARTED;
467
468
469
470
471 blkdev_dequeue_request(rq);
472 q->orig_bar_rq = rq;
473 rq = &q->bar_rq;
474 rq_init(q, rq);
475 rq->flags = bio_data_dir(q->orig_bar_rq->bio);
476 rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
477 rq->elevator_private = NULL;
478 rq->rl = NULL;
479 init_request_from_bio(rq, q->orig_bar_rq->bio);
480 rq->end_io = bar_end_io;
481
482
483
484
485
486
487
488 if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
489 queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
490 else
491 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
492
493 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
494
495 if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
496 queue_flush(q, QUEUE_ORDERED_PREFLUSH);
497 rq = &q->pre_flush_rq;
498 } else
499 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
500
501 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
502 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
503 else
504 rq = NULL;
505
506 return rq;
507}
508
509int blk_do_ordered(request_queue_t *q, struct request **rqp)
510{
511 struct request *rq = *rqp;
512 int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
513
514 if (!q->ordseq) {
515 if (!is_barrier)
516 return 1;
517
518 if (q->next_ordered != QUEUE_ORDERED_NONE) {
519 *rqp = start_ordered(q, rq);
520 return 1;
521 } else {
522
523
524
525
526 blkdev_dequeue_request(rq);
527 end_that_request_first(rq, -EOPNOTSUPP,
528 rq->hard_nr_sectors);
529 end_that_request_last(rq, -EOPNOTSUPP);
530 *rqp = NULL;
531 return 0;
532 }
533 }
534
535
536
537
538
539
540 if (!blk_fs_request(rq) &&
541 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
542 return 1;
543
544 if (q->ordered & QUEUE_ORDERED_TAG) {
545
546 if (is_barrier && rq != &q->bar_rq)
547 *rqp = NULL;
548 } else {
549
550 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
551 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
552 *rqp = NULL;
553 }
554
555 return 1;
556}
557
558static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
559{
560 request_queue_t *q = bio->bi_private;
561 struct bio_vec *bvec;
562 int i;
563
564
565
566
567
568
569 q->bi_size += bytes;
570
571 if (bio->bi_size)
572 return 1;
573
574
575 bio->bi_idx = 0;
576 bio_for_each_segment(bvec, bio, i) {
577 bvec->bv_len += bvec->bv_offset;
578 bvec->bv_offset = 0;
579 }
580
581
582 set_bit(BIO_UPTODATE, &bio->bi_flags);
583 bio->bi_size = q->bi_size;
584 bio->bi_sector -= (q->bi_size >> 9);
585 q->bi_size = 0;
586
587 return 0;
588}
589
590static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
591 unsigned int nbytes, int error)
592{
593 request_queue_t *q = rq->q;
594 bio_end_io_t *endio;
595 void *private;
596
597 if (&q->bar_rq != rq)
598 return 0;
599
600
601
602
603 if (error && !q->orderr)
604 q->orderr = error;
605
606 endio = bio->bi_end_io;
607 private = bio->bi_private;
608 bio->bi_end_io = flush_dry_bio_endio;
609 bio->bi_private = q;
610
611 bio_endio(bio, nbytes, error);
612
613 bio->bi_end_io = endio;
614 bio->bi_private = private;
615
616 return 1;
617}
618
619
620
621
622
623
624
625
626
627
628
629
630void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
631{
632 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
633 int dma = 0;
634
635 q->bounce_gfp = GFP_NOIO;
636#if BITS_PER_LONG == 64
637
638
639
640 if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
641 dma = 1;
642 q->bounce_pfn = max_low_pfn;
643#else
644 if (bounce_pfn < blk_max_low_pfn)
645 dma = 1;
646 q->bounce_pfn = bounce_pfn;
647#endif
648 if (dma) {
649 init_emergency_isa_pool();
650 q->bounce_gfp = GFP_NOIO | GFP_DMA;
651 q->bounce_pfn = bounce_pfn;
652 }
653}
654
655EXPORT_SYMBOL(blk_queue_bounce_limit);
656
657
658
659
660
661
662
663
664
665
666void blk_queue_max_sectors(request_queue_t *q, unsigned int max_sectors)
667{
668 if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
669 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
670 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
671 }
672
673 if (BLK_DEF_MAX_SECTORS > max_sectors)
674 q->max_hw_sectors = q->max_sectors = max_sectors;
675 else {
676 q->max_sectors = BLK_DEF_MAX_SECTORS;
677 q->max_hw_sectors = max_sectors;
678 }
679}
680
681EXPORT_SYMBOL(blk_queue_max_sectors);
682
683
684
685
686
687
688
689
690
691
692
693void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
694{
695 if (!max_segments) {
696 max_segments = 1;
697 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
698 }
699
700 q->max_phys_segments = max_segments;
701}
702
703EXPORT_SYMBOL(blk_queue_max_phys_segments);
704
705
706
707
708
709
710
711
712
713
714
715
716void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
717{
718 if (!max_segments) {
719 max_segments = 1;
720 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
721 }
722
723 q->max_hw_segments = max_segments;
724}
725
726EXPORT_SYMBOL(blk_queue_max_hw_segments);
727
728
729
730
731
732
733
734
735
736
737void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
738{
739 if (max_size < PAGE_CACHE_SIZE) {
740 max_size = PAGE_CACHE_SIZE;
741 printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
742 }
743
744 q->max_segment_size = max_size;
745}
746
747EXPORT_SYMBOL(blk_queue_max_segment_size);
748
749
750
751
752
753
754
755
756
757
758
759
760void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
761{
762 q->hardsect_size = size;
763}
764
765EXPORT_SYMBOL(blk_queue_hardsect_size);
766
767
768
769
770#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
771
772
773
774
775
776
777void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
778{
779
780 t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
781 t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
782
783 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
784 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
785 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
786 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
787 if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
788 clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags);
789}
790
791EXPORT_SYMBOL(blk_queue_stack_limits);
792
793
794
795
796
797
798void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
799{
800 if (mask < PAGE_CACHE_SIZE - 1) {
801 mask = PAGE_CACHE_SIZE - 1;
802 printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
803 }
804
805 q->seg_boundary_mask = mask;
806}
807
808EXPORT_SYMBOL(blk_queue_segment_boundary);
809
810
811
812
813
814
815
816
817
818
819
820void blk_queue_dma_alignment(request_queue_t *q, int mask)
821{
822 q->dma_alignment = mask;
823}
824
825EXPORT_SYMBOL(blk_queue_dma_alignment);
826
827
828
829
830
831
832
833
834
835
836
837
838struct request *blk_queue_find_tag(request_queue_t *q, int tag)
839{
840 struct blk_queue_tag *bqt = q->queue_tags;
841
842 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
843 return NULL;
844
845 return bqt->tag_index[tag];
846}
847
848EXPORT_SYMBOL(blk_queue_find_tag);
849
850
851
852
853
854
855
856
857
858static void __blk_queue_free_tags(request_queue_t *q)
859{
860 struct blk_queue_tag *bqt = q->queue_tags;
861
862 if (!bqt)
863 return;
864
865 if (atomic_dec_and_test(&bqt->refcnt)) {
866 BUG_ON(bqt->busy);
867 BUG_ON(!list_empty(&bqt->busy_list));
868
869 kfree(bqt->tag_index);
870 bqt->tag_index = NULL;
871
872 kfree(bqt->tag_map);
873 bqt->tag_map = NULL;
874
875 kfree(bqt);
876 }
877
878 q->queue_tags = NULL;
879 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
880}
881
882
883
884
885
886
887
888
889
890void blk_queue_free_tags(request_queue_t *q)
891{
892 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
893}
894
895EXPORT_SYMBOL(blk_queue_free_tags);
896
897static int
898init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
899{
900 struct request **tag_index;
901 unsigned long *tag_map;
902 int nr_ulongs;
903
904 if (depth > q->nr_requests * 2) {
905 depth = q->nr_requests * 2;
906 printk(KERN_ERR "%s: adjusted depth to %d\n",
907 __FUNCTION__, depth);
908 }
909
910 tag_index = kzalloc(depth * sizeof(struct request *), GFP_ATOMIC);
911 if (!tag_index)
912 goto fail;
913
914 nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
915 tag_map = kzalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
916 if (!tag_map)
917 goto fail;
918
919 tags->real_max_depth = depth;
920 tags->max_depth = depth;
921 tags->tag_index = tag_index;
922 tags->tag_map = tag_map;
923
924 return 0;
925fail:
926 kfree(tag_index);
927 return -ENOMEM;
928}
929
930
931
932
933
934
935
936int blk_queue_init_tags(request_queue_t *q, int depth,
937 struct blk_queue_tag *tags)
938{
939 int rc;
940
941 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
942
943 if (!tags && !q->queue_tags) {
944 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
945 if (!tags)
946 goto fail;
947
948 if (init_tag_map(q, tags, depth))
949 goto fail;
950
951 INIT_LIST_HEAD(&tags->busy_list);
952 tags->busy = 0;
953 atomic_set(&tags->refcnt, 1);
954 } else if (q->queue_tags) {
955 if ((rc = blk_queue_resize_tags(q, depth)))
956 return rc;
957 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
958 return 0;
959 } else
960 atomic_inc(&tags->refcnt);
961
962
963
964
965 q->queue_tags = tags;
966 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
967 return 0;
968fail:
969 kfree(tags);
970 return -ENOMEM;
971}
972
973EXPORT_SYMBOL(blk_queue_init_tags);
974
975
976
977
978
979
980
981
982
983int blk_queue_resize_tags(request_queue_t *q, int new_depth)
984{
985 struct blk_queue_tag *bqt = q->queue_tags;
986 struct request **tag_index;
987 unsigned long *tag_map;
988 int max_depth, nr_ulongs;
989
990 if (!bqt)
991 return -ENXIO;
992
993
994
995
996
997
998
999 if (new_depth <= bqt->real_max_depth) {
1000 bqt->max_depth = new_depth;
1001 return 0;
1002 }
1003
1004
1005
1006
1007 tag_index = bqt->tag_index;
1008 tag_map = bqt->tag_map;
1009 max_depth = bqt->real_max_depth;
1010
1011 if (init_tag_map(q, bqt, new_depth))
1012 return -ENOMEM;
1013
1014 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
1015 nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
1016 memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
1017
1018 kfree(tag_index);
1019 kfree(tag_map);
1020 return 0;
1021}
1022
1023EXPORT_SYMBOL(blk_queue_resize_tags);
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039void blk_queue_end_tag(request_queue_t *q, struct request *rq)
1040{
1041 struct blk_queue_tag *bqt = q->queue_tags;
1042 int tag = rq->tag;
1043
1044 BUG_ON(tag == -1);
1045
1046 if (unlikely(tag >= bqt->real_max_depth))
1047
1048
1049
1050
1051 return;
1052
1053 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
1054 printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
1055 __FUNCTION__, tag);
1056 return;
1057 }
1058
1059 list_del_init(&rq->queuelist);
1060 rq->flags &= ~REQ_QUEUED;
1061 rq->tag = -1;
1062
1063 if (unlikely(bqt->tag_index[tag] == NULL))
1064 printk(KERN_ERR "%s: tag %d is missing\n",
1065 __FUNCTION__, tag);
1066
1067 bqt->tag_index[tag] = NULL;
1068 bqt->busy--;
1069}
1070
1071EXPORT_SYMBOL(blk_queue_end_tag);
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091int blk_queue_start_tag(request_queue_t *q, struct request *rq)
1092{
1093 struct blk_queue_tag *bqt = q->queue_tags;
1094 int tag;
1095
1096 if (unlikely((rq->flags & REQ_QUEUED))) {
1097 printk(KERN_ERR
1098 "%s: request %p for device [%s] already tagged %d",
1099 __FUNCTION__, rq,
1100 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
1101 BUG();
1102 }
1103
1104 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
1105 if (tag >= bqt->max_depth)
1106 return 1;
1107
1108 __set_bit(tag, bqt->tag_map);
1109
1110 rq->flags |= REQ_QUEUED;
1111 rq->tag = tag;
1112 bqt->tag_index[tag] = rq;
1113 blkdev_dequeue_request(rq);
1114 list_add(&rq->queuelist, &bqt->busy_list);
1115 bqt->busy++;
1116 return 0;
1117}
1118
1119EXPORT_SYMBOL(blk_queue_start_tag);
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133void blk_queue_invalidate_tags(request_queue_t *q)
1134{
1135 struct blk_queue_tag *bqt = q->queue_tags;
1136 struct list_head *tmp, *n;
1137 struct request *rq;
1138
1139 list_for_each_safe(tmp, n, &bqt->busy_list) {
1140 rq = list_entry_rq(tmp);
1141
1142 if (rq->tag == -1) {
1143 printk(KERN_ERR
1144 "%s: bad tag found on list\n", __FUNCTION__);
1145 list_del_init(&rq->queuelist);
1146 rq->flags &= ~REQ_QUEUED;
1147 } else
1148 blk_queue_end_tag(q, rq);
1149
1150 rq->flags &= ~REQ_STARTED;
1151 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1152 }
1153}
1154
1155EXPORT_SYMBOL(blk_queue_invalidate_tags);
1156
1157static const char * const rq_flags[] = {
1158 "REQ_RW",
1159 "REQ_FAILFAST",
1160 "REQ_SORTED",
1161 "REQ_SOFTBARRIER",
1162 "REQ_HARDBARRIER",
1163 "REQ_FUA",
1164 "REQ_CMD",
1165 "REQ_NOMERGE",
1166 "REQ_STARTED",
1167 "REQ_DONTPREP",
1168 "REQ_QUEUED",
1169 "REQ_ELVPRIV",
1170 "REQ_PC",
1171 "REQ_BLOCK_PC",
1172 "REQ_SENSE",
1173 "REQ_FAILED",
1174 "REQ_QUIET",
1175 "REQ_SPECIAL",
1176 "REQ_DRIVE_CMD",
1177 "REQ_DRIVE_TASK",
1178 "REQ_DRIVE_TASKFILE",
1179 "REQ_PREEMPT",
1180 "REQ_PM_SUSPEND",
1181 "REQ_PM_RESUME",
1182 "REQ_PM_SHUTDOWN",
1183 "REQ_ORDERED_COLOR",
1184};
1185
1186void blk_dump_rq_flags(struct request *rq, char *msg)
1187{
1188 int bit;
1189
1190 printk("%s: dev %s: flags = ", msg,
1191 rq->rq_disk ? rq->rq_disk->disk_name : "?");
1192 bit = 0;
1193 do {
1194 if (rq->flags & (1 << bit))
1195 printk("%s ", rq_flags[bit]);
1196 bit++;
1197 } while (bit < __REQ_NR_BITS);
1198
1199 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
1200 rq->nr_sectors,
1201 rq->current_nr_sectors);
1202 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
1203
1204 if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
1205 printk("cdb: ");
1206 for (bit = 0; bit < sizeof(rq->cmd); bit++)
1207 printk("%02x ", rq->cmd[bit]);
1208 printk("\n");
1209 }
1210}
1211
1212EXPORT_SYMBOL(blk_dump_rq_flags);
1213
1214void blk_recount_segments(request_queue_t *q, struct bio *bio)
1215{
1216 struct bio_vec *bv, *bvprv = NULL;
1217 int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
1218 int high, highprv = 1;
1219
1220 if (unlikely(!bio->bi_io_vec))
1221 return;
1222
1223 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1224 hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
1225 bio_for_each_segment(bv, bio, i) {
1226
1227
1228
1229
1230
1231 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
1232 if (high || highprv)
1233 goto new_hw_segment;
1234 if (cluster) {
1235 if (seg_size + bv->bv_len > q->max_segment_size)
1236 goto new_segment;
1237 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
1238 goto new_segment;
1239 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
1240 goto new_segment;
1241 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
1242 goto new_hw_segment;
1243
1244 seg_size += bv->bv_len;
1245 hw_seg_size += bv->bv_len;
1246 bvprv = bv;
1247 continue;
1248 }
1249new_segment:
1250 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
1251 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
1252 hw_seg_size += bv->bv_len;
1253 } else {
1254new_hw_segment:
1255 if (hw_seg_size > bio->bi_hw_front_size)
1256 bio->bi_hw_front_size = hw_seg_size;
1257 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
1258 nr_hw_segs++;
1259 }
1260
1261 nr_phys_segs++;
1262 bvprv = bv;
1263 seg_size = bv->bv_len;
1264 highprv = high;
1265 }
1266 if (hw_seg_size > bio->bi_hw_back_size)
1267 bio->bi_hw_back_size = hw_seg_size;
1268 if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
1269 bio->bi_hw_front_size = hw_seg_size;
1270 bio->bi_phys_segments = nr_phys_segs;
1271 bio->bi_hw_segments = nr_hw_segs;
1272 bio->bi_flags |= (1 << BIO_SEG_VALID);
1273}
1274
1275
1276static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
1277 struct bio *nxt)
1278{
1279 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
1280 return 0;
1281
1282 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
1283 return 0;
1284 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1285 return 0;
1286
1287
1288
1289
1290
1291 if (BIO_SEG_BOUNDARY(q, bio, nxt))
1292 return 1;
1293
1294 return 0;
1295}
1296
1297static int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
1298 struct bio *nxt)
1299{
1300 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1301 blk_recount_segments(q, bio);
1302 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
1303 blk_recount_segments(q, nxt);
1304 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
1305 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
1306 return 0;
1307 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
1308 return 0;
1309
1310 return 1;
1311}
1312
1313
1314
1315
1316
1317int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)
1318{
1319 struct bio_vec *bvec, *bvprv;
1320 struct bio *bio;
1321 int nsegs, i, cluster;
1322
1323 nsegs = 0;
1324 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1325
1326
1327
1328
1329 bvprv = NULL;
1330 rq_for_each_bio(bio, rq) {
1331
1332
1333
1334 bio_for_each_segment(bvec, bio, i) {
1335 int nbytes = bvec->bv_len;
1336
1337 if (bvprv && cluster) {
1338 if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
1339 goto new_segment;
1340
1341 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1342 goto new_segment;
1343 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1344 goto new_segment;
1345
1346 sg[nsegs - 1].length += nbytes;
1347 } else {
1348new_segment:
1349 memset(&sg[nsegs],0,sizeof(struct scatterlist));
1350 sg[nsegs].page = bvec->bv_page;
1351 sg[nsegs].length = nbytes;
1352 sg[nsegs].offset = bvec->bv_offset;
1353
1354 nsegs++;
1355 }
1356 bvprv = bvec;
1357 }
1358 }
1359
1360 return nsegs;
1361}
1362
1363EXPORT_SYMBOL(blk_rq_map_sg);
1364
1365
1366
1367
1368
1369
1370static inline int ll_new_mergeable(request_queue_t *q,
1371 struct request *req,
1372 struct bio *bio)
1373{
1374 int nr_phys_segs = bio_phys_segments(q, bio);
1375
1376 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1377 req->flags |= REQ_NOMERGE;
1378 if (req == q->last_merge)
1379 q->last_merge = NULL;
1380 return 0;
1381 }
1382
1383
1384
1385
1386
1387 req->nr_phys_segments += nr_phys_segs;
1388 return 1;
1389}
1390
1391static inline int ll_new_hw_segment(request_queue_t *q,
1392 struct request *req,
1393 struct bio *bio)
1394{
1395 int nr_hw_segs = bio_hw_segments(q, bio);
1396 int nr_phys_segs = bio_phys_segments(q, bio);
1397
1398 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1399 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1400 req->flags |= REQ_NOMERGE;
1401 if (req == q->last_merge)
1402 q->last_merge = NULL;
1403 return 0;
1404 }
1405
1406
1407
1408
1409
1410 req->nr_hw_segments += nr_hw_segs;
1411 req->nr_phys_segments += nr_phys_segs;
1412 return 1;
1413}
1414
1415static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1416 struct bio *bio)
1417{
1418 unsigned short max_sectors;
1419 int len;
1420
1421 if (unlikely(blk_pc_request(req)))
1422 max_sectors = q->max_hw_sectors;
1423 else
1424 max_sectors = q->max_sectors;
1425
1426 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1427 req->flags |= REQ_NOMERGE;
1428 if (req == q->last_merge)
1429 q->last_merge = NULL;
1430 return 0;
1431 }
1432 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1433 blk_recount_segments(q, req->biotail);
1434 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1435 blk_recount_segments(q, bio);
1436 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1437 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1438 !BIOVEC_VIRT_OVERSIZE(len)) {
1439 int mergeable = ll_new_mergeable(q, req, bio);
1440
1441 if (mergeable) {
1442 if (req->nr_hw_segments == 1)
1443 req->bio->bi_hw_front_size = len;
1444 if (bio->bi_hw_segments == 1)
1445 bio->bi_hw_back_size = len;
1446 }
1447 return mergeable;
1448 }
1449
1450 return ll_new_hw_segment(q, req, bio);
1451}
1452
1453static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1454 struct bio *bio)
1455{
1456 unsigned short max_sectors;
1457 int len;
1458
1459 if (unlikely(blk_pc_request(req)))
1460 max_sectors = q->max_hw_sectors;
1461 else
1462 max_sectors = q->max_sectors;
1463
1464
1465 if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
1466 req->flags |= REQ_NOMERGE;
1467 if (req == q->last_merge)
1468 q->last_merge = NULL;
1469 return 0;
1470 }
1471 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1472 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1473 blk_recount_segments(q, bio);
1474 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1475 blk_recount_segments(q, req->bio);
1476 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1477 !BIOVEC_VIRT_OVERSIZE(len)) {
1478 int mergeable = ll_new_mergeable(q, req, bio);
1479
1480 if (mergeable) {
1481 if (bio->bi_hw_segments == 1)
1482 bio->bi_hw_front_size = len;
1483 if (req->nr_hw_segments == 1)
1484 req->biotail->bi_hw_back_size = len;
1485 }
1486 return mergeable;
1487 }
1488
1489 return ll_new_hw_segment(q, req, bio);
1490}
1491
1492static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
1493 struct request *next)
1494{
1495 int total_phys_segments;
1496 int total_hw_segments;
1497
1498
1499
1500
1501
1502 if (req->special || next->special)
1503 return 0;
1504
1505
1506
1507
1508 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1509 return 0;
1510
1511 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1512 if (blk_phys_contig_segment(q, req->biotail, next->bio))
1513 total_phys_segments--;
1514
1515 if (total_phys_segments > q->max_phys_segments)
1516 return 0;
1517
1518 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1519 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1520 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1521
1522
1523
1524 if (req->nr_hw_segments == 1)
1525 req->bio->bi_hw_front_size = len;
1526 if (next->nr_hw_segments == 1)
1527 next->biotail->bi_hw_back_size = len;
1528 total_hw_segments--;
1529 }
1530
1531 if (total_hw_segments > q->max_hw_segments)
1532 return 0;
1533
1534
1535 req->nr_phys_segments = total_phys_segments;
1536 req->nr_hw_segments = total_hw_segments;
1537 return 1;
1538}
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548void blk_plug_device(request_queue_t *q)
1549{
1550 WARN_ON(!irqs_disabled());
1551
1552
1553
1554
1555
1556 if (blk_queue_stopped(q))
1557 return;
1558
1559 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
1560 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1561 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
1562 }
1563}
1564
1565EXPORT_SYMBOL(blk_plug_device);
1566
1567
1568
1569
1570
1571int blk_remove_plug(request_queue_t *q)
1572{
1573 WARN_ON(!irqs_disabled());
1574
1575 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1576 return 0;
1577
1578 del_timer(&q->unplug_timer);
1579 return 1;
1580}
1581
1582EXPORT_SYMBOL(blk_remove_plug);
1583
1584
1585
1586
1587void __generic_unplug_device(request_queue_t *q)
1588{
1589 if (unlikely(blk_queue_stopped(q)))
1590 return;
1591
1592 if (!blk_remove_plug(q))
1593 return;
1594
1595 q->request_fn(q);
1596}
1597EXPORT_SYMBOL(__generic_unplug_device);
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610void generic_unplug_device(request_queue_t *q)
1611{
1612 spin_lock_irq(q->queue_lock);
1613 __generic_unplug_device(q);
1614 spin_unlock_irq(q->queue_lock);
1615}
1616EXPORT_SYMBOL(generic_unplug_device);
1617
1618static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1619 struct page *page)
1620{
1621 request_queue_t *q = bdi->unplug_io_data;
1622
1623
1624
1625
1626 if (q->unplug_fn) {
1627 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1628 q->rq.count[READ] + q->rq.count[WRITE]);
1629
1630 q->unplug_fn(q);
1631 }
1632}
1633
1634static void blk_unplug_work(void *data)
1635{
1636 request_queue_t *q = data;
1637
1638 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
1639 q->rq.count[READ] + q->rq.count[WRITE]);
1640
1641 q->unplug_fn(q);
1642}
1643
1644static void blk_unplug_timeout(unsigned long data)
1645{
1646 request_queue_t *q = (request_queue_t *)data;
1647
1648 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
1649 q->rq.count[READ] + q->rq.count[WRITE]);
1650
1651 kblockd_schedule_work(&q->unplug_work);
1652}
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663void blk_start_queue(request_queue_t *q)
1664{
1665 WARN_ON(!irqs_disabled());
1666
1667 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1668
1669
1670
1671
1672
1673 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1674 q->request_fn(q);
1675 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1676 } else {
1677 blk_plug_device(q);
1678 kblockd_schedule_work(&q->unplug_work);
1679 }
1680}
1681
1682EXPORT_SYMBOL(blk_start_queue);
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698void blk_stop_queue(request_queue_t *q)
1699{
1700 blk_remove_plug(q);
1701 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1702}
1703EXPORT_SYMBOL(blk_stop_queue);
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719void blk_sync_queue(struct request_queue *q)
1720{
1721 del_timer_sync(&q->unplug_timer);
1722 kblockd_flush();
1723}
1724EXPORT_SYMBOL(blk_sync_queue);
1725
1726
1727
1728
1729
1730void blk_run_queue(struct request_queue *q)
1731{
1732 unsigned long flags;
1733
1734 spin_lock_irqsave(q->queue_lock, flags);
1735 blk_remove_plug(q);
1736
1737
1738
1739
1740
1741 if (!elv_queue_empty(q)) {
1742 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1743 q->request_fn(q);
1744 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1745 } else {
1746 blk_plug_device(q);
1747 kblockd_schedule_work(&q->unplug_work);
1748 }
1749 }
1750
1751 spin_unlock_irqrestore(q->queue_lock, flags);
1752}
1753EXPORT_SYMBOL(blk_run_queue);
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770static void blk_release_queue(struct kobject *kobj)
1771{
1772 request_queue_t *q = container_of(kobj, struct request_queue, kobj);
1773 struct request_list *rl = &q->rq;
1774
1775 blk_sync_queue(q);
1776
1777 if (rl->rq_pool)
1778 mempool_destroy(rl->rq_pool);
1779
1780 if (q->queue_tags)
1781 __blk_queue_free_tags(q);
1782
1783 if (q->blk_trace)
1784 blk_trace_shutdown(q);
1785
1786 kmem_cache_free(requestq_cachep, q);
1787}
1788
1789void blk_put_queue(request_queue_t *q)
1790{
1791 kobject_put(&q->kobj);
1792}
1793EXPORT_SYMBOL(blk_put_queue);
1794
1795void blk_cleanup_queue(request_queue_t * q)
1796{
1797 mutex_lock(&q->sysfs_lock);
1798 set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
1799 mutex_unlock(&q->sysfs_lock);
1800
1801 if (q->elevator)
1802 elevator_exit(q->elevator);
1803
1804 blk_put_queue(q);
1805}
1806
1807EXPORT_SYMBOL(blk_cleanup_queue);
1808
1809static int blk_init_free_list(request_queue_t *q)
1810{
1811 struct request_list *rl = &q->rq;
1812
1813 rl->count[READ] = rl->count[WRITE] = 0;
1814 rl->starved[READ] = rl->starved[WRITE] = 0;
1815 rl->elvpriv = 0;
1816 init_waitqueue_head(&rl->wait[READ]);
1817 init_waitqueue_head(&rl->wait[WRITE]);
1818
1819 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
1820 mempool_free_slab, request_cachep, q->node);
1821
1822 if (!rl->rq_pool)
1823 return -ENOMEM;
1824
1825 return 0;
1826}
1827
1828request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
1829{
1830 return blk_alloc_queue_node(gfp_mask, -1);
1831}
1832EXPORT_SYMBOL(blk_alloc_queue);
1833
1834static struct kobj_type queue_ktype;
1835
1836request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
1837{
1838 request_queue_t *q;
1839
1840 q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id);
1841 if (!q)
1842 return NULL;
1843
1844 memset(q, 0, sizeof(*q));
1845 init_timer(&q->unplug_timer);
1846
1847 snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
1848 q->kobj.ktype = &queue_ktype;
1849 kobject_init(&q->kobj);
1850
1851 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1852 q->backing_dev_info.unplug_io_data = q;
1853
1854 mutex_init(&q->sysfs_lock);
1855
1856 return q;
1857}
1858EXPORT_SYMBOL(blk_alloc_queue_node);
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1894{
1895 return blk_init_queue_node(rfn, lock, -1);
1896}
1897EXPORT_SYMBOL(blk_init_queue);
1898
1899request_queue_t *
1900blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
1901{
1902 request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id);
1903
1904 if (!q)
1905 return NULL;
1906
1907 q->node = node_id;
1908 if (blk_init_free_list(q)) {
1909 kmem_cache_free(requestq_cachep, q);
1910 return NULL;
1911 }
1912
1913
1914
1915
1916
1917 if (!lock) {
1918 spin_lock_init(&q->__queue_lock);
1919 lock = &q->__queue_lock;
1920 }
1921
1922 q->request_fn = rfn;
1923 q->back_merge_fn = ll_back_merge_fn;
1924 q->front_merge_fn = ll_front_merge_fn;
1925 q->merge_requests_fn = ll_merge_requests_fn;
1926 q->prep_rq_fn = NULL;
1927 q->unplug_fn = generic_unplug_device;
1928 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1929 q->queue_lock = lock;
1930
1931 blk_queue_segment_boundary(q, 0xffffffff);
1932
1933 blk_queue_make_request(q, __make_request);
1934 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
1935
1936 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
1937 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
1938
1939
1940
1941
1942 if (!elevator_init(q, NULL)) {
1943 blk_queue_congestion_threshold(q);
1944 return q;
1945 }
1946
1947 blk_put_queue(q);
1948 return NULL;
1949}
1950EXPORT_SYMBOL(blk_init_queue_node);
1951
1952int blk_get_queue(request_queue_t *q)
1953{
1954 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
1955 kobject_get(&q->kobj);
1956 return 0;
1957 }
1958
1959 return 1;
1960}
1961
1962EXPORT_SYMBOL(blk_get_queue);
1963
1964static inline void blk_free_request(request_queue_t *q, struct request *rq)
1965{
1966 if (rq->flags & REQ_ELVPRIV)
1967 elv_put_request(q, rq);
1968 mempool_free(rq, q->rq.rq_pool);
1969}
1970
1971static inline struct request *
1972blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
1973 int priv, gfp_t gfp_mask)
1974{
1975 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1976
1977 if (!rq)
1978 return NULL;
1979
1980
1981
1982
1983
1984 rq->flags = rw;
1985
1986 if (priv) {
1987 if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
1988 mempool_free(rq, q->rq.rq_pool);
1989 return NULL;
1990 }
1991 rq->flags |= REQ_ELVPRIV;
1992 }
1993
1994 return rq;
1995}
1996
1997
1998
1999
2000
2001static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)
2002{
2003 if (!ioc)
2004 return 0;
2005
2006
2007
2008
2009
2010
2011 return ioc->nr_batch_requests == q->nr_batching ||
2012 (ioc->nr_batch_requests > 0
2013 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
2014}
2015
2016
2017
2018
2019
2020
2021
2022static void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
2023{
2024 if (!ioc || ioc_batching(q, ioc))
2025 return;
2026
2027 ioc->nr_batch_requests = q->nr_batching;
2028 ioc->last_waited = jiffies;
2029}
2030
2031static void __freed_request(request_queue_t *q, int rw)
2032{
2033 struct request_list *rl = &q->rq;
2034
2035 if (rl->count[rw] < queue_congestion_off_threshold(q))
2036 clear_queue_congested(q, rw);
2037
2038 if (rl->count[rw] + 1 <= q->nr_requests) {
2039 if (waitqueue_active(&rl->wait[rw]))
2040 wake_up(&rl->wait[rw]);
2041
2042 blk_clear_queue_full(q, rw);
2043 }
2044}
2045
2046
2047
2048
2049
2050static void freed_request(request_queue_t *q, int rw, int priv)
2051{
2052 struct request_list *rl = &q->rq;
2053
2054 rl->count[rw]--;
2055 if (priv)
2056 rl->elvpriv--;
2057
2058 __freed_request(q, rw);
2059
2060 if (unlikely(rl->starved[rw ^ 1]))
2061 __freed_request(q, rw ^ 1);
2062}
2063
2064#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
2065
2066
2067
2068
2069
2070static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
2071 gfp_t gfp_mask)
2072{
2073 struct request *rq = NULL;
2074 struct request_list *rl = &q->rq;
2075 struct io_context *ioc = NULL;
2076 int may_queue, priv;
2077
2078 may_queue = elv_may_queue(q, rw, bio);
2079 if (may_queue == ELV_MQUEUE_NO)
2080 goto rq_starved;
2081
2082 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) {
2083 if (rl->count[rw]+1 >= q->nr_requests) {
2084 ioc = current_io_context(GFP_ATOMIC);
2085
2086
2087
2088
2089
2090
2091 if (!blk_queue_full(q, rw)) {
2092 ioc_set_batching(q, ioc);
2093 blk_set_queue_full(q, rw);
2094 } else {
2095 if (may_queue != ELV_MQUEUE_MUST
2096 && !ioc_batching(q, ioc)) {
2097
2098
2099
2100
2101
2102 goto out;
2103 }
2104 }
2105 }
2106 set_queue_congested(q, rw);
2107 }
2108
2109
2110
2111
2112
2113
2114 if (rl->count[rw] >= (3 * q->nr_requests / 2))
2115 goto out;
2116
2117 rl->count[rw]++;
2118 rl->starved[rw] = 0;
2119
2120 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
2121 if (priv)
2122 rl->elvpriv++;
2123
2124 spin_unlock_irq(q->queue_lock);
2125
2126 rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
2127 if (unlikely(!rq)) {
2128
2129
2130
2131
2132
2133
2134
2135 spin_lock_irq(q->queue_lock);
2136 freed_request(q, rw, priv);
2137
2138
2139
2140
2141
2142
2143
2144
2145rq_starved:
2146 if (unlikely(rl->count[rw] == 0))
2147 rl->starved[rw] = 1;
2148
2149 goto out;
2150 }
2151
2152
2153
2154
2155
2156
2157
2158 if (ioc_batching(q, ioc))
2159 ioc->nr_batch_requests--;
2160
2161 rq_init(q, rq);
2162 rq->rl = rl;
2163
2164 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ);
2165out:
2166 return rq;
2167}
2168
2169
2170
2171
2172
2173
2174
2175static struct request *get_request_wait(request_queue_t *q, int rw,
2176 struct bio *bio)
2177{
2178 struct request *rq;
2179
2180 rq = get_request(q, rw, bio, GFP_NOIO);
2181 while (!rq) {
2182 DEFINE_WAIT(wait);
2183 struct request_list *rl = &q->rq;
2184
2185 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
2186 TASK_UNINTERRUPTIBLE);
2187
2188 rq = get_request(q, rw, bio, GFP_NOIO);
2189
2190 if (!rq) {
2191 struct io_context *ioc;
2192
2193 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
2194
2195 __generic_unplug_device(q);
2196 spin_unlock_irq(q->queue_lock);
2197 io_schedule();
2198
2199
2200
2201
2202
2203
2204
2205 ioc = current_io_context(GFP_NOIO);
2206 ioc_set_batching(q, ioc);
2207
2208 spin_lock_irq(q->queue_lock);
2209 }
2210 finish_wait(&rl->wait[rw], &wait);
2211 }
2212
2213 return rq;
2214}
2215
2216struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
2217{
2218 struct request *rq;
2219
2220 BUG_ON(rw != READ && rw != WRITE);
2221
2222 spin_lock_irq(q->queue_lock);
2223 if (gfp_mask & __GFP_WAIT) {
2224 rq = get_request_wait(q, rw, NULL);
2225 } else {
2226 rq = get_request(q, rw, NULL, gfp_mask);
2227 if (!rq)
2228 spin_unlock_irq(q->queue_lock);
2229 }
2230
2231
2232 return rq;
2233}
2234EXPORT_SYMBOL(blk_get_request);
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246void blk_requeue_request(request_queue_t *q, struct request *rq)
2247{
2248 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
2249
2250 if (blk_rq_tagged(rq))
2251 blk_queue_end_tag(q, rq);
2252
2253 elv_requeue_request(q, rq);
2254}
2255
2256EXPORT_SYMBOL(blk_requeue_request);
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277void blk_insert_request(request_queue_t *q, struct request *rq,
2278 int at_head, void *data)
2279{
2280 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2281 unsigned long flags;
2282
2283
2284
2285
2286
2287
2288 rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
2289
2290 rq->special = data;
2291
2292 spin_lock_irqsave(q->queue_lock, flags);
2293
2294
2295
2296
2297 if (blk_rq_tagged(rq))
2298 blk_queue_end_tag(q, rq);
2299
2300 drive_stat_acct(rq, rq->nr_sectors, 1);
2301 __elv_add_request(q, rq, where, 0);
2302
2303 if (blk_queue_plugged(q))
2304 __generic_unplug_device(q);
2305 else
2306 q->request_fn(q);
2307 spin_unlock_irqrestore(q->queue_lock, flags);
2308}
2309
2310EXPORT_SYMBOL(blk_insert_request);
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
2333 unsigned int len)
2334{
2335 unsigned long uaddr;
2336 struct bio *bio;
2337 int reading;
2338
2339 if (len > (q->max_hw_sectors << 9))
2340 return -EINVAL;
2341 if (!len || !ubuf)
2342 return -EINVAL;
2343
2344 reading = rq_data_dir(rq) == READ;
2345
2346
2347
2348
2349
2350 uaddr = (unsigned long) ubuf;
2351 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
2352 bio = bio_map_user(q, NULL, uaddr, len, reading);
2353 else
2354 bio = bio_copy_user(q, uaddr, len, reading);
2355
2356 if (!IS_ERR(bio)) {
2357 rq->bio = rq->biotail = bio;
2358 blk_rq_bio_prep(q, rq, bio);
2359
2360 rq->buffer = rq->data = NULL;
2361 rq->data_len = len;
2362 return 0;
2363 }
2364
2365
2366
2367
2368 return PTR_ERR(bio);
2369}
2370
2371EXPORT_SYMBOL(blk_rq_map_user);
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393int blk_rq_map_user_iov(request_queue_t *q, struct request *rq,
2394 struct sg_iovec *iov, int iov_count)
2395{
2396 struct bio *bio;
2397
2398 if (!iov || iov_count <= 0)
2399 return -EINVAL;
2400
2401
2402
2403
2404 bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ);
2405 if (IS_ERR(bio))
2406 return PTR_ERR(bio);
2407
2408 rq->bio = rq->biotail = bio;
2409 blk_rq_bio_prep(q, rq, bio);
2410 rq->buffer = rq->data = NULL;
2411 rq->data_len = bio->bi_size;
2412 return 0;
2413}
2414
2415EXPORT_SYMBOL(blk_rq_map_user_iov);
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425int blk_rq_unmap_user(struct bio *bio, unsigned int ulen)
2426{
2427 int ret = 0;
2428
2429 if (bio) {
2430 if (bio_flagged(bio, BIO_USER_MAPPED))
2431 bio_unmap_user(bio);
2432 else
2433 ret = bio_uncopy_user(bio);
2434 }
2435
2436 return 0;
2437}
2438
2439EXPORT_SYMBOL(blk_rq_unmap_user);
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
2450 unsigned int len, gfp_t gfp_mask)
2451{
2452 struct bio *bio;
2453
2454 if (len > (q->max_hw_sectors << 9))
2455 return -EINVAL;
2456 if (!len || !kbuf)
2457 return -EINVAL;
2458
2459 bio = bio_map_kern(q, kbuf, len, gfp_mask);
2460 if (IS_ERR(bio))
2461 return PTR_ERR(bio);
2462
2463 if (rq_data_dir(rq) == WRITE)
2464 bio->bi_rw |= (1 << BIO_RW);
2465
2466 rq->bio = rq->biotail = bio;
2467 blk_rq_bio_prep(q, rq, bio);
2468
2469 rq->buffer = rq->data = NULL;
2470 rq->data_len = len;
2471 return 0;
2472}
2473
2474EXPORT_SYMBOL(blk_rq_map_kern);
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
2489 struct request *rq, int at_head,
2490 rq_end_io_fn *done)
2491{
2492 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
2493
2494 rq->rq_disk = bd_disk;
2495 rq->flags |= REQ_NOMERGE;
2496 rq->end_io = done;
2497 WARN_ON(irqs_disabled());
2498 spin_lock_irq(q->queue_lock);
2499 __elv_add_request(q, rq, where, 1);
2500 __generic_unplug_device(q);
2501 spin_unlock_irq(q->queue_lock);
2502}
2503EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2517 struct request *rq, int at_head)
2518{
2519 DECLARE_COMPLETION_ONSTACK(wait);
2520 char sense[SCSI_SENSE_BUFFERSIZE];
2521 int err = 0;
2522
2523
2524
2525
2526
2527 rq->ref_count++;
2528
2529 if (!rq->sense) {
2530 memset(sense, 0, sizeof(sense));
2531 rq->sense = sense;
2532 rq->sense_len = 0;
2533 }
2534
2535 rq->waiting = &wait;
2536 blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
2537 wait_for_completion(&wait);
2538 rq->waiting = NULL;
2539
2540 if (rq->errors)
2541 err = -EIO;
2542
2543 return err;
2544}
2545
2546EXPORT_SYMBOL(blk_execute_rq);
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2559{
2560 request_queue_t *q;
2561
2562 if (bdev->bd_disk == NULL)
2563 return -ENXIO;
2564
2565 q = bdev_get_queue(bdev);
2566 if (!q)
2567 return -ENXIO;
2568 if (!q->issue_flush_fn)
2569 return -EOPNOTSUPP;
2570
2571 return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
2572}
2573
2574EXPORT_SYMBOL(blkdev_issue_flush);
2575
2576static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
2577{
2578 int rw = rq_data_dir(rq);
2579
2580 if (!blk_fs_request(rq) || !rq->rq_disk)
2581 return;
2582
2583 if (!new_io) {
2584 __disk_stat_inc(rq->rq_disk, merges[rw]);
2585 } else {
2586 disk_round_stats(rq->rq_disk);
2587 rq->rq_disk->in_flight++;
2588 }
2589}
2590
2591
2592
2593
2594
2595
2596static inline void add_request(request_queue_t * q, struct request * req)
2597{
2598 drive_stat_acct(req, req->nr_sectors, 1);
2599
2600 if (q->activity_fn)
2601 q->activity_fn(q->activity_data, rq_data_dir(req));
2602
2603
2604
2605
2606
2607 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2608}
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625void disk_round_stats(struct gendisk *disk)
2626{
2627 unsigned long now = jiffies;
2628
2629 if (now == disk->stamp)
2630 return;
2631
2632 if (disk->in_flight) {
2633 __disk_stat_add(disk, time_in_queue,
2634 disk->in_flight * (now - disk->stamp));
2635 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
2636 }
2637 disk->stamp = now;
2638}
2639
2640EXPORT_SYMBOL_GPL(disk_round_stats);
2641
2642
2643
2644
2645void __blk_put_request(request_queue_t *q, struct request *req)
2646{
2647 struct request_list *rl = req->rl;
2648
2649 if (unlikely(!q))
2650 return;
2651 if (unlikely(--req->ref_count))
2652 return;
2653
2654 elv_completed_request(q, req);
2655
2656 req->rq_status = RQ_INACTIVE;
2657 req->rl = NULL;
2658
2659
2660
2661
2662
2663 if (rl) {
2664 int rw = rq_data_dir(req);
2665 int priv = req->flags & REQ_ELVPRIV;
2666
2667 BUG_ON(!list_empty(&req->queuelist));
2668
2669 blk_free_request(q, req);
2670 freed_request(q, rw, priv);
2671 }
2672}
2673
2674EXPORT_SYMBOL_GPL(__blk_put_request);
2675
2676void blk_put_request(struct request *req)
2677{
2678 unsigned long flags;
2679 request_queue_t *q = req->q;
2680
2681
2682
2683
2684
2685 if (q) {
2686 spin_lock_irqsave(q->queue_lock, flags);
2687 __blk_put_request(q, req);
2688 spin_unlock_irqrestore(q->queue_lock, flags);
2689 }
2690}
2691
2692EXPORT_SYMBOL(blk_put_request);
2693
2694
2695
2696
2697
2698
2699void blk_end_sync_rq(struct request *rq, int error)
2700{
2701 struct completion *waiting = rq->waiting;
2702
2703 rq->waiting = NULL;
2704 __blk_put_request(rq->q, rq);
2705
2706
2707
2708
2709
2710 complete(waiting);
2711}
2712EXPORT_SYMBOL(blk_end_sync_rq);
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723long blk_congestion_wait(int rw, long timeout)
2724{
2725 long ret;
2726 DEFINE_WAIT(wait);
2727 wait_queue_head_t *wqh = &congestion_wqh[rw];
2728
2729 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
2730 ret = io_schedule_timeout(timeout);
2731 finish_wait(wqh, &wait);
2732 return ret;
2733}
2734
2735EXPORT_SYMBOL(blk_congestion_wait);
2736
2737
2738
2739
2740static int attempt_merge(request_queue_t *q, struct request *req,
2741 struct request *next)
2742{
2743 if (!rq_mergeable(req) || !rq_mergeable(next))
2744 return 0;
2745
2746
2747
2748
2749 if (req->sector + req->nr_sectors != next->sector)
2750 return 0;
2751
2752 if (rq_data_dir(req) != rq_data_dir(next)
2753 || req->rq_disk != next->rq_disk
2754 || next->waiting || next->special)
2755 return 0;
2756
2757
2758
2759
2760
2761
2762
2763 if (!q->merge_requests_fn(q, req, next))
2764 return 0;
2765
2766
2767
2768
2769
2770
2771
2772 if (time_after(req->start_time, next->start_time))
2773 req->start_time = next->start_time;
2774
2775 req->biotail->bi_next = next->bio;
2776 req->biotail = next->biotail;
2777
2778 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2779
2780 elv_merge_requests(q, req, next);
2781
2782 if (req->rq_disk) {
2783 disk_round_stats(req->rq_disk);
2784 req->rq_disk->in_flight--;
2785 }
2786
2787 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
2788
2789 __blk_put_request(q, next);
2790 return 1;
2791}
2792
2793static inline int attempt_back_merge(request_queue_t *q, struct request *rq)
2794{
2795 struct request *next = elv_latter_request(q, rq);
2796
2797 if (next)
2798 return attempt_merge(q, rq, next);
2799
2800 return 0;
2801}
2802
2803static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2804{
2805 struct request *prev = elv_former_request(q, rq);
2806
2807 if (prev)
2808 return attempt_merge(q, prev, rq);
2809
2810 return 0;
2811}
2812
2813static void init_request_from_bio(struct request *req, struct bio *bio)
2814{
2815 req->flags |= REQ_CMD;
2816
2817
2818
2819
2820 if (bio_rw_ahead(bio) || bio_failfast(bio))
2821 req->flags |= REQ_FAILFAST;
2822
2823
2824
2825
2826 if (unlikely(bio_barrier(bio)))
2827 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2828
2829 if (bio_sync(bio))
2830 req->flags |= REQ_RW_SYNC;
2831
2832 req->errors = 0;
2833 req->hard_sector = req->sector = bio->bi_sector;
2834 req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio);
2835 req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio);
2836 req->nr_phys_segments = bio_phys_segments(req->q, bio);
2837 req->nr_hw_segments = bio_hw_segments(req->q, bio);
2838 req->buffer = bio_data(bio);
2839 req->waiting = NULL;
2840 req->bio = req->biotail = bio;
2841 req->ioprio = bio_prio(bio);
2842 req->rq_disk = bio->bi_bdev->bd_disk;
2843 req->start_time = jiffies;
2844}
2845
2846static int __make_request(request_queue_t *q, struct bio *bio)
2847{
2848 struct request *req;
2849 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err, sync;
2850 unsigned short prio;
2851 sector_t sector;
2852
2853 sector = bio->bi_sector;
2854 nr_sectors = bio_sectors(bio);
2855 cur_nr_sectors = bio_cur_sectors(bio);
2856 prio = bio_prio(bio);
2857
2858 rw = bio_data_dir(bio);
2859 sync = bio_sync(bio);
2860
2861
2862
2863
2864
2865
2866 blk_queue_bounce(q, &bio);
2867
2868 spin_lock_prefetch(q->queue_lock);
2869
2870 barrier = bio_barrier(bio);
2871 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
2872 err = -EOPNOTSUPP;
2873 goto end_io;
2874 }
2875
2876 spin_lock_irq(q->queue_lock);
2877
2878 if (unlikely(barrier) || elv_queue_empty(q))
2879 goto get_rq;
2880
2881 el_ret = elv_merge(q, &req, bio);
2882 switch (el_ret) {
2883 case ELEVATOR_BACK_MERGE:
2884 BUG_ON(!rq_mergeable(req));
2885
2886 if (!q->back_merge_fn(q, req, bio))
2887 break;
2888
2889 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
2890
2891 req->biotail->bi_next = bio;
2892 req->biotail = bio;
2893 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2894 req->ioprio = ioprio_best(req->ioprio, prio);
2895 drive_stat_acct(req, nr_sectors, 0);
2896 if (!attempt_back_merge(q, req))
2897 elv_merged_request(q, req);
2898 goto out;
2899
2900 case ELEVATOR_FRONT_MERGE:
2901 BUG_ON(!rq_mergeable(req));
2902
2903 if (!q->front_merge_fn(q, req, bio))
2904 break;
2905
2906 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
2907
2908 bio->bi_next = req->bio;
2909 req->bio = bio;
2910
2911
2912
2913
2914
2915
2916 req->buffer = bio_data(bio);
2917 req->current_nr_sectors = cur_nr_sectors;
2918 req->hard_cur_sectors = cur_nr_sectors;
2919 req->sector = req->hard_sector = sector;
2920 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2921 req->ioprio = ioprio_best(req->ioprio, prio);
2922 drive_stat_acct(req, nr_sectors, 0);
2923 if (!attempt_front_merge(q, req))
2924 elv_merged_request(q, req);
2925 goto out;
2926
2927
2928 default:
2929 ;
2930 }
2931
2932get_rq:
2933
2934
2935
2936
2937 req = get_request_wait(q, rw, bio);
2938
2939
2940
2941
2942
2943
2944
2945 init_request_from_bio(req, bio);
2946
2947 spin_lock_irq(q->queue_lock);
2948 if (elv_queue_empty(q))
2949 blk_plug_device(q);
2950 add_request(q, req);
2951out:
2952 if (sync)
2953 __generic_unplug_device(q);
2954
2955 spin_unlock_irq(q->queue_lock);
2956 return 0;
2957
2958end_io:
2959 bio_endio(bio, nr_sectors << 9, err);
2960 return 0;
2961}
2962
2963
2964
2965
2966static inline void blk_partition_remap(struct bio *bio)
2967{
2968 struct block_device *bdev = bio->bi_bdev;
2969
2970 if (bdev != bdev->bd_contains) {
2971 struct hd_struct *p = bdev->bd_part;
2972 const int rw = bio_data_dir(bio);
2973
2974 p->sectors[rw] += bio_sectors(bio);
2975 p->ios[rw]++;
2976
2977 bio->bi_sector += p->start_sect;
2978 bio->bi_bdev = bdev->bd_contains;
2979 }
2980}
2981
2982static void handle_bad_sector(struct bio *bio)
2983{
2984 char b[BDEVNAME_SIZE];
2985
2986 printk(KERN_INFO "attempt to access beyond end of device\n");
2987 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
2988 bdevname(bio->bi_bdev, b),
2989 bio->bi_rw,
2990 (unsigned long long)bio->bi_sector + bio_sectors(bio),
2991 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
2992
2993 set_bit(BIO_EOF, &bio->bi_flags);
2994}
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020void generic_make_request(struct bio *bio)
3021{
3022 request_queue_t *q;
3023 sector_t maxsector;
3024 int ret, nr_sectors = bio_sectors(bio);
3025 dev_t old_dev;
3026
3027 might_sleep();
3028
3029 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
3030 if (maxsector) {
3031 sector_t sector = bio->bi_sector;
3032
3033 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
3034
3035
3036
3037
3038
3039 handle_bad_sector(bio);
3040 goto end_io;
3041 }
3042 }
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052 maxsector = -1;
3053 old_dev = 0;
3054 do {
3055 char b[BDEVNAME_SIZE];
3056
3057 q = bdev_get_queue(bio->bi_bdev);
3058 if (!q) {
3059 printk(KERN_ERR
3060 "generic_make_request: Trying to access "
3061 "nonexistent block-device %s (%Lu)\n",
3062 bdevname(bio->bi_bdev, b),
3063 (long long) bio->bi_sector);
3064end_io:
3065 bio_endio(bio, bio->bi_size, -EIO);
3066 break;
3067 }
3068
3069 if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
3070 printk("bio too big device %s (%u > %u)\n",
3071 bdevname(bio->bi_bdev, b),
3072 bio_sectors(bio),
3073 q->max_hw_sectors);
3074 goto end_io;
3075 }
3076
3077 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
3078 goto end_io;
3079
3080
3081
3082
3083
3084 blk_partition_remap(bio);
3085
3086 if (maxsector != -1)
3087 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
3088 maxsector);
3089
3090 blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
3091
3092 maxsector = bio->bi_sector;
3093 old_dev = bio->bi_bdev->bd_dev;
3094
3095 ret = q->make_request_fn(q, bio);
3096 } while (ret);
3097}
3098
3099EXPORT_SYMBOL(generic_make_request);
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111void submit_bio(int rw, struct bio *bio)
3112{
3113 int count = bio_sectors(bio);
3114
3115 BIO_BUG_ON(!bio->bi_size);
3116 BIO_BUG_ON(!bio->bi_io_vec);
3117 bio->bi_rw |= rw;
3118 if (rw & WRITE)
3119 count_vm_events(PGPGOUT, count);
3120 else
3121 count_vm_events(PGPGIN, count);
3122
3123 if (unlikely(block_dump)) {
3124 char b[BDEVNAME_SIZE];
3125 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
3126 current->comm, current->pid,
3127 (rw & WRITE) ? "WRITE" : "READ",
3128 (unsigned long long)bio->bi_sector,
3129 bdevname(bio->bi_bdev,b));
3130 }
3131
3132 generic_make_request(bio);
3133}
3134
3135EXPORT_SYMBOL(submit_bio);
3136
3137static void blk_recalc_rq_segments(struct request *rq)
3138{
3139 struct bio *bio, *prevbio = NULL;
3140 int nr_phys_segs, nr_hw_segs;
3141 unsigned int phys_size, hw_size;
3142 request_queue_t *q = rq->q;
3143
3144 if (!rq->bio)
3145 return;
3146
3147 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
3148 rq_for_each_bio(bio, rq) {
3149
3150 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
3151
3152 nr_phys_segs += bio_phys_segments(q, bio);
3153 nr_hw_segs += bio_hw_segments(q, bio);
3154 if (prevbio) {
3155 int pseg = phys_size + prevbio->bi_size + bio->bi_size;
3156 int hseg = hw_size + prevbio->bi_size + bio->bi_size;
3157
3158 if (blk_phys_contig_segment(q, prevbio, bio) &&
3159 pseg <= q->max_segment_size) {
3160 nr_phys_segs--;
3161 phys_size += prevbio->bi_size + bio->bi_size;
3162 } else
3163 phys_size = 0;
3164
3165 if (blk_hw_contig_segment(q, prevbio, bio) &&
3166 hseg <= q->max_segment_size) {
3167 nr_hw_segs--;
3168 hw_size += prevbio->bi_size + bio->bi_size;
3169 } else
3170 hw_size = 0;
3171 }
3172 prevbio = bio;
3173 }
3174
3175 rq->nr_phys_segments = nr_phys_segs;
3176 rq->nr_hw_segments = nr_hw_segs;
3177}
3178
3179static void blk_recalc_rq_sectors(struct request *rq, int nsect)
3180{
3181 if (blk_fs_request(rq)) {
3182 rq->hard_sector += nsect;
3183 rq->hard_nr_sectors -= nsect;
3184
3185
3186
3187
3188 if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
3189 (rq->sector <= rq->hard_sector)) {
3190 rq->sector = rq->hard_sector;
3191 rq->nr_sectors = rq->hard_nr_sectors;
3192 rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
3193 rq->current_nr_sectors = rq->hard_cur_sectors;
3194 rq->buffer = bio_data(rq->bio);
3195 }
3196
3197
3198
3199
3200
3201 if (rq->nr_sectors < rq->current_nr_sectors) {
3202 printk("blk: request botched\n");
3203 rq->nr_sectors = rq->current_nr_sectors;
3204 }
3205 }
3206}
3207
3208static int __end_that_request_first(struct request *req, int uptodate,
3209 int nr_bytes)
3210{
3211 int total_bytes, bio_nbytes, error, next_idx = 0;
3212 struct bio *bio;
3213
3214 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
3215
3216
3217
3218
3219 error = 0;
3220 if (end_io_error(uptodate))
3221 error = !uptodate ? -EIO : uptodate;
3222
3223
3224
3225
3226
3227 if (!blk_pc_request(req))
3228 req->errors = 0;
3229
3230 if (!uptodate) {
3231 if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
3232 printk("end_request: I/O error, dev %s, sector %llu\n",
3233 req->rq_disk ? req->rq_disk->disk_name : "?",
3234 (unsigned long long)req->sector);
3235 }
3236
3237 if (blk_fs_request(req) && req->rq_disk) {
3238 const int rw = rq_data_dir(req);
3239
3240 disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
3241 }
3242
3243 total_bytes = bio_nbytes = 0;
3244 while ((bio = req->bio) != NULL) {
3245 int nbytes;
3246
3247 if (nr_bytes >= bio->bi_size) {
3248 req->bio = bio->bi_next;
3249 nbytes = bio->bi_size;
3250 if (!ordered_bio_endio(req, bio, nbytes, error))
3251 bio_endio(bio, nbytes, error);
3252 next_idx = 0;
3253 bio_nbytes = 0;
3254 } else {
3255 int idx = bio->bi_idx + next_idx;
3256
3257 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
3258 blk_dump_rq_flags(req, "__end_that");
3259 printk("%s: bio idx %d >= vcnt %d\n",
3260 __FUNCTION__,
3261 bio->bi_idx, bio->bi_vcnt);
3262 break;
3263 }
3264
3265 nbytes = bio_iovec_idx(bio, idx)->bv_len;
3266 BIO_BUG_ON(nbytes > bio->bi_size);
3267
3268
3269
3270
3271 if (unlikely(nbytes > nr_bytes)) {
3272 bio_nbytes += nr_bytes;
3273 total_bytes += nr_bytes;
3274 break;
3275 }
3276
3277
3278
3279
3280 next_idx++;
3281 bio_nbytes += nbytes;
3282 }
3283
3284 total_bytes += nbytes;
3285 nr_bytes -= nbytes;
3286
3287 if ((bio = req->bio)) {
3288
3289
3290
3291 if (unlikely(nr_bytes <= 0))
3292 break;
3293 }
3294 }
3295
3296
3297
3298
3299 if (!req->bio)
3300 return 0;
3301
3302
3303
3304
3305 if (bio_nbytes) {
3306 if (!ordered_bio_endio(req, bio, bio_nbytes, error))
3307 bio_endio(bio, bio_nbytes, error);
3308 bio->bi_idx += next_idx;
3309 bio_iovec(bio)->bv_offset += nr_bytes;
3310 bio_iovec(bio)->bv_len -= nr_bytes;
3311 }
3312
3313 blk_recalc_rq_sectors(req, total_bytes >> 9);
3314 blk_recalc_rq_segments(req);
3315 return 1;
3316}
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
3333{
3334 return __end_that_request_first(req, uptodate, nr_sectors << 9);
3335}
3336
3337EXPORT_SYMBOL(end_that_request_first);
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
3355{
3356 return __end_that_request_first(req, uptodate, nr_bytes);
3357}
3358
3359EXPORT_SYMBOL(end_that_request_chunk);
3360
3361
3362
3363
3364
3365static void blk_done_softirq(struct softirq_action *h)
3366{
3367 struct list_head *cpu_list, local_list;
3368
3369 local_irq_disable();
3370 cpu_list = &__get_cpu_var(blk_cpu_done);
3371 list_replace_init(cpu_list, &local_list);
3372 local_irq_enable();
3373
3374 while (!list_empty(&local_list)) {
3375 struct request *rq = list_entry(local_list.next, struct request, donelist);
3376
3377 list_del_init(&rq->donelist);
3378 rq->q->softirq_done_fn(rq);
3379 }
3380}
3381
3382#ifdef CONFIG_HOTPLUG_CPU
3383
3384static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
3385 void *hcpu)
3386{
3387
3388
3389
3390
3391 if (action == CPU_DEAD) {
3392 int cpu = (unsigned long) hcpu;
3393
3394 local_irq_disable();
3395 list_splice_init(&per_cpu(blk_cpu_done, cpu),
3396 &__get_cpu_var(blk_cpu_done));
3397 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3398 local_irq_enable();
3399 }
3400
3401 return NOTIFY_OK;
3402}
3403
3404
3405static struct notifier_block __devinitdata blk_cpu_notifier = {
3406 .notifier_call = blk_cpu_notify,
3407};
3408
3409#endif
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423void blk_complete_request(struct request *req)
3424{
3425 struct list_head *cpu_list;
3426 unsigned long flags;
3427
3428 BUG_ON(!req->q->softirq_done_fn);
3429
3430 local_irq_save(flags);
3431
3432 cpu_list = &__get_cpu_var(blk_cpu_done);
3433 list_add_tail(&req->donelist, cpu_list);
3434 raise_softirq_irqoff(BLOCK_SOFTIRQ);
3435
3436 local_irq_restore(flags);
3437}
3438
3439EXPORT_SYMBOL(blk_complete_request);
3440
3441
3442
3443
3444void end_that_request_last(struct request *req, int uptodate)
3445{
3446 struct gendisk *disk = req->rq_disk;
3447 int error;
3448
3449
3450
3451
3452 error = 0;
3453 if (end_io_error(uptodate))
3454 error = !uptodate ? -EIO : uptodate;
3455
3456 if (unlikely(laptop_mode) && blk_fs_request(req))
3457 laptop_io_completion();
3458
3459
3460
3461
3462
3463
3464 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
3465 unsigned long duration = jiffies - req->start_time;
3466 const int rw = rq_data_dir(req);
3467
3468 __disk_stat_inc(disk, ios[rw]);
3469 __disk_stat_add(disk, ticks[rw], duration);
3470 disk_round_stats(disk);
3471 disk->in_flight--;
3472 }
3473 if (req->end_io)
3474 req->end_io(req, error);
3475 else
3476 __blk_put_request(req->q, req);
3477}
3478
3479EXPORT_SYMBOL(end_that_request_last);
3480
3481void end_request(struct request *req, int uptodate)
3482{
3483 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3484 add_disk_randomness(req->rq_disk);
3485 blkdev_dequeue_request(req);
3486 end_that_request_last(req, uptodate);
3487 }
3488}
3489
3490EXPORT_SYMBOL(end_request);
3491
3492void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
3493{
3494
3495 rq->flags |= (bio->bi_rw & 3);
3496
3497 rq->nr_phys_segments = bio_phys_segments(q, bio);
3498 rq->nr_hw_segments = bio_hw_segments(q, bio);
3499 rq->current_nr_sectors = bio_cur_sectors(bio);
3500 rq->hard_cur_sectors = rq->current_nr_sectors;
3501 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
3502 rq->buffer = bio_data(bio);
3503
3504 rq->bio = rq->biotail = bio;
3505}
3506
3507EXPORT_SYMBOL(blk_rq_bio_prep);
3508
3509int kblockd_schedule_work(struct work_struct *work)
3510{
3511 return queue_work(kblockd_workqueue, work);
3512}
3513
3514EXPORT_SYMBOL(kblockd_schedule_work);
3515
3516void kblockd_flush(void)
3517{
3518 flush_workqueue(kblockd_workqueue);
3519}
3520EXPORT_SYMBOL(kblockd_flush);
3521
3522int __init blk_dev_init(void)
3523{
3524 int i;
3525
3526 kblockd_workqueue = create_workqueue("kblockd");
3527 if (!kblockd_workqueue)
3528 panic("Failed to create kblockd\n");
3529
3530 request_cachep = kmem_cache_create("blkdev_requests",
3531 sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);
3532
3533 requestq_cachep = kmem_cache_create("blkdev_queue",
3534 sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);
3535
3536 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3537 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3538
3539 for_each_possible_cpu(i)
3540 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
3541
3542 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
3543 register_hotcpu_notifier(&blk_cpu_notifier);
3544
3545 blk_max_low_pfn = max_low_pfn;
3546 blk_max_pfn = max_pfn;
3547
3548 return 0;
3549}
3550
3551
3552
3553
3554void put_io_context(struct io_context *ioc)
3555{
3556 if (ioc == NULL)
3557 return;
3558
3559 BUG_ON(atomic_read(&ioc->refcount) == 0);
3560
3561 if (atomic_dec_and_test(&ioc->refcount)) {
3562 struct cfq_io_context *cic;
3563
3564 rcu_read_lock();
3565 if (ioc->aic && ioc->aic->dtor)
3566 ioc->aic->dtor(ioc->aic);
3567 if (ioc->cic_root.rb_node != NULL) {
3568 struct rb_node *n = rb_first(&ioc->cic_root);
3569
3570 cic = rb_entry(n, struct cfq_io_context, rb_node);
3571 cic->dtor(ioc);
3572 }
3573 rcu_read_unlock();
3574
3575 kmem_cache_free(iocontext_cachep, ioc);
3576 }
3577}
3578EXPORT_SYMBOL(put_io_context);
3579
3580
3581void exit_io_context(void)
3582{
3583 unsigned long flags;
3584 struct io_context *ioc;
3585 struct cfq_io_context *cic;
3586
3587 local_irq_save(flags);
3588 task_lock(current);
3589 ioc = current->io_context;
3590 current->io_context = NULL;
3591 ioc->task = NULL;
3592 task_unlock(current);
3593 local_irq_restore(flags);
3594
3595 if (ioc->aic && ioc->aic->exit)
3596 ioc->aic->exit(ioc->aic);
3597 if (ioc->cic_root.rb_node != NULL) {
3598 cic = rb_entry(rb_first(&ioc->cic_root), struct cfq_io_context, rb_node);
3599 cic->exit(ioc);
3600 }
3601
3602 put_io_context(ioc);
3603}
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613struct io_context *current_io_context(gfp_t gfp_flags)
3614{
3615 struct task_struct *tsk = current;
3616 struct io_context *ret;
3617
3618 ret = tsk->io_context;
3619 if (likely(ret))
3620 return ret;
3621
3622 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
3623 if (ret) {
3624 atomic_set(&ret->refcount, 1);
3625 ret->task = current;
3626 ret->set_ioprio = NULL;
3627 ret->last_waited = jiffies;
3628 ret->nr_batch_requests = 0;
3629 ret->aic = NULL;
3630 ret->cic_root.rb_node = NULL;
3631
3632 smp_wmb();
3633 tsk->io_context = ret;
3634 }
3635
3636 return ret;
3637}
3638EXPORT_SYMBOL(current_io_context);
3639
3640
3641
3642
3643
3644
3645
3646struct io_context *get_io_context(gfp_t gfp_flags)
3647{
3648 struct io_context *ret;
3649 ret = current_io_context(gfp_flags);
3650 if (likely(ret))
3651 atomic_inc(&ret->refcount);
3652 return ret;
3653}
3654EXPORT_SYMBOL(get_io_context);
3655
3656void copy_io_context(struct io_context **pdst, struct io_context **psrc)
3657{
3658 struct io_context *src = *psrc;
3659 struct io_context *dst = *pdst;
3660
3661 if (src) {
3662 BUG_ON(atomic_read(&src->refcount) == 0);
3663 atomic_inc(&src->refcount);
3664 put_io_context(dst);
3665 *pdst = src;
3666 }
3667}
3668EXPORT_SYMBOL(copy_io_context);
3669
3670void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
3671{
3672 struct io_context *temp;
3673 temp = *ioc1;
3674 *ioc1 = *ioc2;
3675 *ioc2 = temp;
3676}
3677EXPORT_SYMBOL(swap_io_context);
3678
3679
3680
3681
3682struct queue_sysfs_entry {
3683 struct attribute attr;
3684 ssize_t (*show)(struct request_queue *, char *);
3685 ssize_t (*store)(struct request_queue *, const char *, size_t);
3686};
3687
3688static ssize_t
3689queue_var_show(unsigned int var, char *page)
3690{
3691 return sprintf(page, "%d\n", var);
3692}
3693
3694static ssize_t
3695queue_var_store(unsigned long *var, const char *page, size_t count)
3696{
3697 char *p = (char *) page;
3698
3699 *var = simple_strtoul(p, &p, 10);
3700 return count;
3701}
3702
3703static ssize_t queue_requests_show(struct request_queue *q, char *page)
3704{
3705 return queue_var_show(q->nr_requests, (page));
3706}
3707
3708static ssize_t
3709queue_requests_store(struct request_queue *q, const char *page, size_t count)
3710{
3711 struct request_list *rl = &q->rq;
3712 unsigned long nr;
3713 int ret = queue_var_store(&nr, page, count);
3714 if (nr < BLKDEV_MIN_RQ)
3715 nr = BLKDEV_MIN_RQ;
3716
3717 spin_lock_irq(q->queue_lock);
3718 q->nr_requests = nr;
3719 blk_queue_congestion_threshold(q);
3720
3721 if (rl->count[READ] >= queue_congestion_on_threshold(q))
3722 set_queue_congested(q, READ);
3723 else if (rl->count[READ] < queue_congestion_off_threshold(q))
3724 clear_queue_congested(q, READ);
3725
3726 if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
3727 set_queue_congested(q, WRITE);
3728 else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
3729 clear_queue_congested(q, WRITE);
3730
3731 if (rl->count[READ] >= q->nr_requests) {
3732 blk_set_queue_full(q, READ);
3733 } else if (rl->count[READ]+1 <= q->nr_requests) {
3734 blk_clear_queue_full(q, READ);
3735 wake_up(&rl->wait[READ]);
3736 }
3737
3738 if (rl->count[WRITE] >= q->nr_requests) {
3739 blk_set_queue_full(q, WRITE);
3740 } else if (rl->count[WRITE]+1 <= q->nr_requests) {
3741 blk_clear_queue_full(q, WRITE);
3742 wake_up(&rl->wait[WRITE]);
3743 }
3744 spin_unlock_irq(q->queue_lock);
3745 return ret;
3746}
3747
3748static ssize_t queue_ra_show(struct request_queue *q, char *page)
3749{
3750 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3751
3752 return queue_var_show(ra_kb, (page));
3753}
3754
3755static ssize_t
3756queue_ra_store(struct request_queue *q, const char *page, size_t count)
3757{
3758 unsigned long ra_kb;
3759 ssize_t ret = queue_var_store(&ra_kb, page, count);
3760
3761 spin_lock_irq(q->queue_lock);
3762 if (ra_kb > (q->max_sectors >> 1))
3763 ra_kb = (q->max_sectors >> 1);
3764
3765 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3766 spin_unlock_irq(q->queue_lock);
3767
3768 return ret;
3769}
3770
3771static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
3772{
3773 int max_sectors_kb = q->max_sectors >> 1;
3774
3775 return queue_var_show(max_sectors_kb, (page));
3776}
3777
3778static ssize_t
3779queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
3780{
3781 unsigned long max_sectors_kb,
3782 max_hw_sectors_kb = q->max_hw_sectors >> 1,
3783 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
3784 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
3785 int ra_kb;
3786
3787 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
3788 return -EINVAL;
3789
3790
3791
3792
3793 spin_lock_irq(q->queue_lock);
3794
3795
3796
3797 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3798 if (ra_kb > max_sectors_kb)
3799 q->backing_dev_info.ra_pages =
3800 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
3801
3802 q->max_sectors = max_sectors_kb << 1;
3803 spin_unlock_irq(q->queue_lock);
3804
3805 return ret;
3806}
3807
3808static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
3809{
3810 int max_hw_sectors_kb = q->max_hw_sectors >> 1;
3811
3812 return queue_var_show(max_hw_sectors_kb, (page));
3813}
3814
3815
3816static struct queue_sysfs_entry queue_requests_entry = {
3817 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
3818 .show = queue_requests_show,
3819 .store = queue_requests_store,
3820};
3821
3822static struct queue_sysfs_entry queue_ra_entry = {
3823 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
3824 .show = queue_ra_show,
3825 .store = queue_ra_store,
3826};
3827
3828static struct queue_sysfs_entry queue_max_sectors_entry = {
3829 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
3830 .show = queue_max_sectors_show,
3831 .store = queue_max_sectors_store,
3832};
3833
3834static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
3835 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
3836 .show = queue_max_hw_sectors_show,
3837};
3838
3839static struct queue_sysfs_entry queue_iosched_entry = {
3840 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
3841 .show = elv_iosched_show,
3842 .store = elv_iosched_store,
3843};
3844
3845static struct attribute *default_attrs[] = {
3846 &queue_requests_entry.attr,
3847 &queue_ra_entry.attr,
3848 &queue_max_hw_sectors_entry.attr,
3849 &queue_max_sectors_entry.attr,
3850 &queue_iosched_entry.attr,
3851 NULL,
3852};
3853
3854#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
3855
3856static ssize_t
3857queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3858{
3859 struct queue_sysfs_entry *entry = to_queue(attr);
3860 request_queue_t *q = container_of(kobj, struct request_queue, kobj);
3861 ssize_t res;
3862
3863 if (!entry->show)
3864 return -EIO;
3865 mutex_lock(&q->sysfs_lock);
3866 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
3867 mutex_unlock(&q->sysfs_lock);
3868 return -ENOENT;
3869 }
3870 res = entry->show(q, page);
3871 mutex_unlock(&q->sysfs_lock);
3872 return res;
3873}
3874
3875static ssize_t
3876queue_attr_store(struct kobject *kobj, struct attribute *attr,
3877 const char *page, size_t length)
3878{
3879 struct queue_sysfs_entry *entry = to_queue(attr);
3880 request_queue_t *q = container_of(kobj, struct request_queue, kobj);
3881
3882 ssize_t res;
3883
3884 if (!entry->store)
3885 return -EIO;
3886 mutex_lock(&q->sysfs_lock);
3887 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
3888 mutex_unlock(&q->sysfs_lock);
3889 return -ENOENT;
3890 }
3891 res = entry->store(q, page, length);
3892 mutex_unlock(&q->sysfs_lock);
3893 return res;
3894}
3895
3896static struct sysfs_ops queue_sysfs_ops = {
3897 .show = queue_attr_show,
3898 .store = queue_attr_store,
3899};
3900
3901static struct kobj_type queue_ktype = {
3902 .sysfs_ops = &queue_sysfs_ops,
3903 .default_attrs = default_attrs,
3904 .release = blk_release_queue,
3905};
3906
3907int blk_register_queue(struct gendisk *disk)
3908{
3909 int ret;
3910
3911 request_queue_t *q = disk->queue;
3912
3913 if (!q || !q->request_fn)
3914 return -ENXIO;
3915
3916 q->kobj.parent = kobject_get(&disk->kobj);
3917
3918 ret = kobject_add(&q->kobj);
3919 if (ret < 0)
3920 return ret;
3921
3922 kobject_uevent(&q->kobj, KOBJ_ADD);
3923
3924 ret = elv_register_queue(q);
3925 if (ret) {
3926 kobject_uevent(&q->kobj, KOBJ_REMOVE);
3927 kobject_del(&q->kobj);
3928 return ret;
3929 }
3930
3931 return 0;
3932}
3933
3934void blk_unregister_queue(struct gendisk *disk)
3935{
3936 request_queue_t *q = disk->queue;
3937
3938 if (q && q->request_fn) {
3939 elv_unregister_queue(q);
3940
3941 kobject_uevent(&q->kobj, KOBJ_REMOVE);
3942 kobject_del(&q->kobj);
3943 kobject_put(&disk->kobj);
3944 }
3945}
3946