1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/config.h>
16#include <linux/kernel.h>
17#include <linux/module.h>
18#include <linux/backing-dev.h>
19#include <linux/bio.h>
20#include <linux/blkdev.h>
21#include <linux/highmem.h>
22#include <linux/mm.h>
23#include <linux/kernel_stat.h>
24#include <linux/string.h>
25#include <linux/init.h>
26#include <linux/bootmem.h>
27#include <linux/completion.h>
28#include <linux/slab.h>
29#include <linux/swap.h>
30#include <linux/writeback.h>
31
32
33
34
35#include <scsi/scsi_cmnd.h>
36
37static void blk_unplug_work(void *data);
38static void blk_unplug_timeout(unsigned long data);
39
40
41
42
43static kmem_cache_t *request_cachep;
44
45
46
47
48static kmem_cache_t *requestq_cachep;
49
50
51
52
53static kmem_cache_t *iocontext_cachep;
54
55static wait_queue_head_t congestion_wqh[2] = {
56 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
57 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
58 };
59
60
61
62
63static struct workqueue_struct *kblockd_workqueue;
64
65unsigned long blk_max_low_pfn, blk_max_pfn;
66
67EXPORT_SYMBOL(blk_max_low_pfn);
68EXPORT_SYMBOL(blk_max_pfn);
69
70
71#define BLK_BATCH_TIME (HZ/50UL)
72
73
74#define BLK_BATCH_REQ 32
75
76
77
78
79
80
81static inline int queue_congestion_on_threshold(struct request_queue *q)
82{
83 return q->nr_congestion_on;
84}
85
86
87
88
89static inline int queue_congestion_off_threshold(struct request_queue *q)
90{
91 return q->nr_congestion_off;
92}
93
94static void blk_queue_congestion_threshold(struct request_queue *q)
95{
96 int nr;
97
98 nr = q->nr_requests - (q->nr_requests / 8) + 1;
99 if (nr > q->nr_requests)
100 nr = q->nr_requests;
101 q->nr_congestion_on = nr;
102
103 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
104 if (nr < 1)
105 nr = 1;
106 q->nr_congestion_off = nr;
107}
108
109
110
111
112
113
114static void clear_queue_congested(request_queue_t *q, int rw)
115{
116 enum bdi_state bit;
117 wait_queue_head_t *wqh = &congestion_wqh[rw];
118
119 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
120 clear_bit(bit, &q->backing_dev_info.state);
121 smp_mb__after_clear_bit();
122 if (waitqueue_active(wqh))
123 wake_up(wqh);
124}
125
126
127
128
129
130static void set_queue_congested(request_queue_t *q, int rw)
131{
132 enum bdi_state bit;
133
134 bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
135 set_bit(bit, &q->backing_dev_info.state);
136}
137
138
139
140
141
142
143
144
145
146
147struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
148{
149 struct backing_dev_info *ret = NULL;
150 request_queue_t *q = bdev_get_queue(bdev);
151
152 if (q)
153 ret = &q->backing_dev_info;
154 return ret;
155}
156
157EXPORT_SYMBOL(blk_get_backing_dev_info);
158
159void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
160{
161 q->activity_fn = fn;
162 q->activity_data = data;
163}
164
165EXPORT_SYMBOL(blk_queue_activity_fn);
166
167
168
169
170
171
172
173
174
175
176
177
178void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
179{
180 q->prep_rq_fn = pfn;
181}
182
183EXPORT_SYMBOL(blk_queue_prep_rq);
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
202{
203 q->merge_bvec_fn = mbfn;
204}
205
206EXPORT_SYMBOL(blk_queue_merge_bvec);
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
231{
232
233
234
235 q->nr_requests = BLKDEV_MAX_RQ;
236 q->max_phys_segments = MAX_PHYS_SEGMENTS;
237 q->max_hw_segments = MAX_HW_SEGMENTS;
238 q->make_request_fn = mfn;
239 q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
240 q->backing_dev_info.state = 0;
241 q->backing_dev_info.memory_backed = 0;
242 blk_queue_max_sectors(q, MAX_SECTORS);
243 blk_queue_hardsect_size(q, 512);
244 blk_queue_dma_alignment(q, 511);
245 blk_queue_congestion_threshold(q);
246 q->nr_batching = BLK_BATCH_REQ;
247
248 q->unplug_thresh = 4;
249 q->unplug_delay = (3 * HZ) / 1000;
250 if (q->unplug_delay == 0)
251 q->unplug_delay = 1;
252
253 INIT_WORK(&q->unplug_work, blk_unplug_work, q);
254
255 q->unplug_timer.function = blk_unplug_timeout;
256 q->unplug_timer.data = (unsigned long)q;
257
258
259
260
261 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
262
263 blk_queue_activity_fn(q, NULL, NULL);
264
265 INIT_LIST_HEAD(&q->drain_list);
266}
267
268EXPORT_SYMBOL(blk_queue_make_request);
269
270
271
272
273
274
275
276
277
278
279
280
281
282void blk_queue_ordered(request_queue_t *q, int flag)
283{
284 if (flag)
285 set_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
286 else
287 clear_bit(QUEUE_FLAG_ORDERED, &q->queue_flags);
288}
289
290EXPORT_SYMBOL(blk_queue_ordered);
291
292
293
294
295
296
297
298
299
300
301
302void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
303{
304 q->issue_flush_fn = iff;
305}
306
307EXPORT_SYMBOL(blk_queue_issue_flush_fn);
308
309
310
311
312
313
314
315
316
317
318
319
320
321void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
322{
323 unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
324
325
326
327
328
329
330 if (bounce_pfn < blk_max_low_pfn) {
331 BUG_ON(dma_addr < BLK_BOUNCE_ISA);
332 init_emergency_isa_pool();
333 q->bounce_gfp = GFP_NOIO | GFP_DMA;
334 } else
335 q->bounce_gfp = GFP_NOIO;
336
337 q->bounce_pfn = bounce_pfn;
338}
339
340EXPORT_SYMBOL(blk_queue_bounce_limit);
341
342
343
344
345
346
347
348
349
350
351void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
352{
353 if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
354 max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
355 printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
356 }
357
358 q->max_sectors = q->max_hw_sectors = max_sectors;
359}
360
361EXPORT_SYMBOL(blk_queue_max_sectors);
362
363
364
365
366
367
368
369
370
371
372
373void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
374{
375 if (!max_segments) {
376 max_segments = 1;
377 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
378 }
379
380 q->max_phys_segments = max_segments;
381}
382
383EXPORT_SYMBOL(blk_queue_max_phys_segments);
384
385
386
387
388
389
390
391
392
393
394
395
396void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
397{
398 if (!max_segments) {
399 max_segments = 1;
400 printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
401 }
402
403 q->max_hw_segments = max_segments;
404}
405
406EXPORT_SYMBOL(blk_queue_max_hw_segments);
407
408
409
410
411
412
413
414
415
416
417void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
418{
419 if (max_size < PAGE_CACHE_SIZE) {
420 max_size = PAGE_CACHE_SIZE;
421 printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
422 }
423
424 q->max_segment_size = max_size;
425}
426
427EXPORT_SYMBOL(blk_queue_max_segment_size);
428
429
430
431
432
433
434
435
436
437
438
439
440void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
441{
442 q->hardsect_size = size;
443}
444
445EXPORT_SYMBOL(blk_queue_hardsect_size);
446
447
448
449
450#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
451
452
453
454
455
456
457void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
458{
459
460 t->max_sectors = t->max_hw_sectors =
461 min_not_zero(t->max_sectors,b->max_sectors);
462
463 t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
464 t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
465 t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
466 t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
467}
468
469EXPORT_SYMBOL(blk_queue_stack_limits);
470
471
472
473
474
475
476void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
477{
478 if (mask < PAGE_CACHE_SIZE - 1) {
479 mask = PAGE_CACHE_SIZE - 1;
480 printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
481 }
482
483 q->seg_boundary_mask = mask;
484}
485
486EXPORT_SYMBOL(blk_queue_segment_boundary);
487
488
489
490
491
492
493
494
495
496
497
498void blk_queue_dma_alignment(request_queue_t *q, int mask)
499{
500 q->dma_alignment = mask;
501}
502
503EXPORT_SYMBOL(blk_queue_dma_alignment);
504
505
506
507
508
509
510
511
512
513
514
515
516
517struct request *blk_queue_find_tag(request_queue_t *q, int tag)
518{
519 struct blk_queue_tag *bqt = q->queue_tags;
520
521 if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
522 return NULL;
523
524 return bqt->tag_index[tag];
525}
526
527EXPORT_SYMBOL(blk_queue_find_tag);
528
529
530
531
532
533
534
535
536
537static void __blk_queue_free_tags(request_queue_t *q)
538{
539 struct blk_queue_tag *bqt = q->queue_tags;
540
541 if (!bqt)
542 return;
543
544 if (atomic_dec_and_test(&bqt->refcnt)) {
545 BUG_ON(bqt->busy);
546 BUG_ON(!list_empty(&bqt->busy_list));
547
548 kfree(bqt->tag_index);
549 bqt->tag_index = NULL;
550
551 kfree(bqt->tag_map);
552 bqt->tag_map = NULL;
553
554 kfree(bqt);
555 }
556
557 q->queue_tags = NULL;
558 q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
559}
560
561
562
563
564
565
566
567
568
569void blk_queue_free_tags(request_queue_t *q)
570{
571 clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
572}
573
574EXPORT_SYMBOL(blk_queue_free_tags);
575
576static int
577init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
578{
579 int bits, i;
580 struct request **tag_index;
581 unsigned long *tag_map;
582
583 if (depth > q->nr_requests * 2) {
584 depth = q->nr_requests * 2;
585 printk(KERN_ERR "%s: adjusted depth to %d\n",
586 __FUNCTION__, depth);
587 }
588
589 tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
590 if (!tag_index)
591 goto fail;
592
593 bits = (depth / BLK_TAGS_PER_LONG) + 1;
594 tag_map = kmalloc(bits * sizeof(unsigned long), GFP_ATOMIC);
595 if (!tag_map)
596 goto fail;
597
598 memset(tag_index, 0, depth * sizeof(struct request *));
599 memset(tag_map, 0, bits * sizeof(unsigned long));
600 tags->max_depth = depth;
601 tags->real_max_depth = bits * BITS_PER_LONG;
602 tags->tag_index = tag_index;
603 tags->tag_map = tag_map;
604
605
606
607
608 for (i = depth; i < bits * BLK_TAGS_PER_LONG; i++)
609 __set_bit(i, tag_map);
610
611 return 0;
612fail:
613 kfree(tag_index);
614 return -ENOMEM;
615}
616
617
618
619
620
621
622int blk_queue_init_tags(request_queue_t *q, int depth,
623 struct blk_queue_tag *tags)
624{
625 int rc;
626
627 BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
628
629 if (!tags && !q->queue_tags) {
630 tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
631 if (!tags)
632 goto fail;
633
634 if (init_tag_map(q, tags, depth))
635 goto fail;
636
637 INIT_LIST_HEAD(&tags->busy_list);
638 tags->busy = 0;
639 atomic_set(&tags->refcnt, 1);
640 } else if (q->queue_tags) {
641 if ((rc = blk_queue_resize_tags(q, depth)))
642 return rc;
643 set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
644 return 0;
645 } else
646 atomic_inc(&tags->refcnt);
647
648
649
650
651 q->queue_tags = tags;
652 q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
653 return 0;
654fail:
655 kfree(tags);
656 return -ENOMEM;
657}
658
659EXPORT_SYMBOL(blk_queue_init_tags);
660
661
662
663
664
665
666
667
668
669int blk_queue_resize_tags(request_queue_t *q, int new_depth)
670{
671 struct blk_queue_tag *bqt = q->queue_tags;
672 struct request **tag_index;
673 unsigned long *tag_map;
674 int bits, max_depth;
675
676 if (!bqt)
677 return -ENXIO;
678
679
680
681
682 if (new_depth <= bqt->real_max_depth) {
683 bqt->max_depth = new_depth;
684 return 0;
685 }
686
687
688
689
690 tag_index = bqt->tag_index;
691 tag_map = bqt->tag_map;
692 max_depth = bqt->real_max_depth;
693
694 if (init_tag_map(q, bqt, new_depth))
695 return -ENOMEM;
696
697 memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
698 bits = max_depth / BLK_TAGS_PER_LONG;
699 memcpy(bqt->tag_map, tag_map, bits * sizeof(unsigned long));
700
701 kfree(tag_index);
702 kfree(tag_map);
703 return 0;
704}
705
706EXPORT_SYMBOL(blk_queue_resize_tags);
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722void blk_queue_end_tag(request_queue_t *q, struct request *rq)
723{
724 struct blk_queue_tag *bqt = q->queue_tags;
725 int tag = rq->tag;
726
727 BUG_ON(tag == -1);
728
729 if (unlikely(tag >= bqt->real_max_depth))
730 return;
731
732 if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
733 printk("attempt to clear non-busy tag (%d)\n", tag);
734 return;
735 }
736
737 list_del_init(&rq->queuelist);
738 rq->flags &= ~REQ_QUEUED;
739 rq->tag = -1;
740
741 if (unlikely(bqt->tag_index[tag] == NULL))
742 printk("tag %d is missing\n", tag);
743
744 bqt->tag_index[tag] = NULL;
745 bqt->busy--;
746}
747
748EXPORT_SYMBOL(blk_queue_end_tag);
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768int blk_queue_start_tag(request_queue_t *q, struct request *rq)
769{
770 struct blk_queue_tag *bqt = q->queue_tags;
771 unsigned long *map = bqt->tag_map;
772 int tag = 0;
773
774 if (unlikely((rq->flags & REQ_QUEUED))) {
775 printk(KERN_ERR
776 "request %p for device [%s] already tagged %d",
777 rq, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
778 BUG();
779 }
780
781 for (map = bqt->tag_map; *map == -1UL; map++) {
782 tag += BLK_TAGS_PER_LONG;
783
784 if (tag >= bqt->max_depth)
785 return 1;
786 }
787
788 tag += ffz(*map);
789 __set_bit(tag, bqt->tag_map);
790
791 rq->flags |= REQ_QUEUED;
792 rq->tag = tag;
793 bqt->tag_index[tag] = rq;
794 blkdev_dequeue_request(rq);
795 list_add(&rq->queuelist, &bqt->busy_list);
796 bqt->busy++;
797 return 0;
798}
799
800EXPORT_SYMBOL(blk_queue_start_tag);
801
802
803
804
805
806
807
808
809
810
811
812
813
814void blk_queue_invalidate_tags(request_queue_t *q)
815{
816 struct blk_queue_tag *bqt = q->queue_tags;
817 struct list_head *tmp, *n;
818 struct request *rq;
819
820 list_for_each_safe(tmp, n, &bqt->busy_list) {
821 rq = list_entry_rq(tmp);
822
823 if (rq->tag == -1) {
824 printk("bad tag found on list\n");
825 list_del_init(&rq->queuelist);
826 rq->flags &= ~REQ_QUEUED;
827 } else
828 blk_queue_end_tag(q, rq);
829
830 rq->flags &= ~REQ_STARTED;
831 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
832 }
833}
834
835EXPORT_SYMBOL(blk_queue_invalidate_tags);
836
837static char *rq_flags[] = {
838 "REQ_RW",
839 "REQ_FAILFAST",
840 "REQ_SOFTBARRIER",
841 "REQ_HARDBARRIER",
842 "REQ_CMD",
843 "REQ_NOMERGE",
844 "REQ_STARTED",
845 "REQ_DONTPREP",
846 "REQ_QUEUED",
847 "REQ_PC",
848 "REQ_BLOCK_PC",
849 "REQ_SENSE",
850 "REQ_FAILED",
851 "REQ_QUIET",
852 "REQ_SPECIAL",
853 "REQ_DRIVE_CMD",
854 "REQ_DRIVE_TASK",
855 "REQ_DRIVE_TASKFILE",
856 "REQ_PREEMPT",
857 "REQ_PM_SUSPEND",
858 "REQ_PM_RESUME",
859 "REQ_PM_SHUTDOWN",
860};
861
862void blk_dump_rq_flags(struct request *rq, char *msg)
863{
864 int bit;
865
866 printk("%s: dev %s: flags = ", msg,
867 rq->rq_disk ? rq->rq_disk->disk_name : "?");
868 bit = 0;
869 do {
870 if (rq->flags & (1 << bit))
871 printk("%s ", rq_flags[bit]);
872 bit++;
873 } while (bit < __REQ_NR_BITS);
874
875 printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
876 rq->nr_sectors,
877 rq->current_nr_sectors);
878 printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
879
880 if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
881 printk("cdb: ");
882 for (bit = 0; bit < sizeof(rq->cmd); bit++)
883 printk("%02x ", rq->cmd[bit]);
884 printk("\n");
885 }
886}
887
888EXPORT_SYMBOL(blk_dump_rq_flags);
889
890void blk_recount_segments(request_queue_t *q, struct bio *bio)
891{
892 struct bio_vec *bv, *bvprv = NULL;
893 int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
894 int high, highprv = 1;
895
896 if (unlikely(!bio->bi_io_vec))
897 return;
898
899 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
900 hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
901 bio_for_each_segment(bv, bio, i) {
902
903
904
905
906
907 high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
908 if (high || highprv)
909 goto new_hw_segment;
910 if (cluster) {
911 if (seg_size + bv->bv_len > q->max_segment_size)
912 goto new_segment;
913 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
914 goto new_segment;
915 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
916 goto new_segment;
917 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
918 goto new_hw_segment;
919
920 seg_size += bv->bv_len;
921 hw_seg_size += bv->bv_len;
922 bvprv = bv;
923 continue;
924 }
925new_segment:
926 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
927 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
928 hw_seg_size += bv->bv_len;
929 } else {
930new_hw_segment:
931 if (hw_seg_size > bio->bi_hw_front_size)
932 bio->bi_hw_front_size = hw_seg_size;
933 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
934 nr_hw_segs++;
935 }
936
937 nr_phys_segs++;
938 bvprv = bv;
939 seg_size = bv->bv_len;
940 highprv = high;
941 }
942 if (hw_seg_size > bio->bi_hw_back_size)
943 bio->bi_hw_back_size = hw_seg_size;
944 if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
945 bio->bi_hw_front_size = hw_seg_size;
946 bio->bi_phys_segments = nr_phys_segs;
947 bio->bi_hw_segments = nr_hw_segs;
948 bio->bi_flags |= (1 << BIO_SEG_VALID);
949}
950
951
952int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
953 struct bio *nxt)
954{
955 if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER)))
956 return 0;
957
958 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
959 return 0;
960 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
961 return 0;
962
963
964
965
966
967 if (BIO_SEG_BOUNDARY(q, bio, nxt))
968 return 1;
969
970 return 0;
971}
972
973EXPORT_SYMBOL(blk_phys_contig_segment);
974
975int blk_hw_contig_segment(request_queue_t *q, struct bio *bio,
976 struct bio *nxt)
977{
978 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
979 blk_recount_segments(q, bio);
980 if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
981 blk_recount_segments(q, nxt);
982 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
983 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_front_size + bio->bi_hw_back_size))
984 return 0;
985 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
986 return 0;
987
988 return 1;
989}
990
991EXPORT_SYMBOL(blk_hw_contig_segment);
992
993
994
995
996
997int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg)
998{
999 struct bio_vec *bvec, *bvprv;
1000 struct bio *bio;
1001 int nsegs, i, cluster;
1002
1003 nsegs = 0;
1004 cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
1005
1006
1007
1008
1009 bvprv = NULL;
1010 rq_for_each_bio(bio, rq) {
1011
1012
1013
1014 bio_for_each_segment(bvec, bio, i) {
1015 int nbytes = bvec->bv_len;
1016
1017 if (bvprv && cluster) {
1018 if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
1019 goto new_segment;
1020
1021 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
1022 goto new_segment;
1023 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
1024 goto new_segment;
1025
1026 sg[nsegs - 1].length += nbytes;
1027 } else {
1028new_segment:
1029 memset(&sg[nsegs],0,sizeof(struct scatterlist));
1030 sg[nsegs].page = bvec->bv_page;
1031 sg[nsegs].length = nbytes;
1032 sg[nsegs].offset = bvec->bv_offset;
1033
1034 nsegs++;
1035 }
1036 bvprv = bvec;
1037 }
1038 }
1039
1040 return nsegs;
1041}
1042
1043EXPORT_SYMBOL(blk_rq_map_sg);
1044
1045
1046
1047
1048
1049
1050static inline int ll_new_mergeable(request_queue_t *q,
1051 struct request *req,
1052 struct bio *bio)
1053{
1054 int nr_phys_segs = bio_phys_segments(q, bio);
1055
1056 if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1057 req->flags |= REQ_NOMERGE;
1058 if (req == q->last_merge)
1059 q->last_merge = NULL;
1060 return 0;
1061 }
1062
1063
1064
1065
1066
1067 req->nr_phys_segments += nr_phys_segs;
1068 return 1;
1069}
1070
1071static inline int ll_new_hw_segment(request_queue_t *q,
1072 struct request *req,
1073 struct bio *bio)
1074{
1075 int nr_hw_segs = bio_hw_segments(q, bio);
1076 int nr_phys_segs = bio_phys_segments(q, bio);
1077
1078 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
1079 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
1080 req->flags |= REQ_NOMERGE;
1081 if (req == q->last_merge)
1082 q->last_merge = NULL;
1083 return 0;
1084 }
1085
1086
1087
1088
1089
1090 req->nr_hw_segments += nr_hw_segs;
1091 req->nr_phys_segments += nr_phys_segs;
1092 return 1;
1093}
1094
1095static int ll_back_merge_fn(request_queue_t *q, struct request *req,
1096 struct bio *bio)
1097{
1098 int len;
1099
1100 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
1101 req->flags |= REQ_NOMERGE;
1102 if (req == q->last_merge)
1103 q->last_merge = NULL;
1104 return 0;
1105 }
1106 if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
1107 blk_recount_segments(q, req->biotail);
1108 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1109 blk_recount_segments(q, bio);
1110 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
1111 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) &&
1112 !BIOVEC_VIRT_OVERSIZE(len)) {
1113 int mergeable = ll_new_mergeable(q, req, bio);
1114
1115 if (mergeable) {
1116 if (req->nr_hw_segments == 1)
1117 req->bio->bi_hw_front_size = len;
1118 if (bio->bi_hw_segments == 1)
1119 bio->bi_hw_back_size = len;
1120 }
1121 return mergeable;
1122 }
1123
1124 return ll_new_hw_segment(q, req, bio);
1125}
1126
1127static int ll_front_merge_fn(request_queue_t *q, struct request *req,
1128 struct bio *bio)
1129{
1130 int len;
1131
1132 if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
1133 req->flags |= REQ_NOMERGE;
1134 if (req == q->last_merge)
1135 q->last_merge = NULL;
1136 return 0;
1137 }
1138 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
1139 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
1140 blk_recount_segments(q, bio);
1141 if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
1142 blk_recount_segments(q, req->bio);
1143 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
1144 !BIOVEC_VIRT_OVERSIZE(len)) {
1145 int mergeable = ll_new_mergeable(q, req, bio);
1146
1147 if (mergeable) {
1148 if (bio->bi_hw_segments == 1)
1149 bio->bi_hw_front_size = len;
1150 if (req->nr_hw_segments == 1)
1151 req->biotail->bi_hw_back_size = len;
1152 }
1153 return mergeable;
1154 }
1155
1156 return ll_new_hw_segment(q, req, bio);
1157}
1158
1159static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
1160 struct request *next)
1161{
1162 int total_phys_segments = req->nr_phys_segments +next->nr_phys_segments;
1163 int total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1164
1165
1166
1167
1168
1169 if (req->special || next->special)
1170 return 0;
1171
1172
1173
1174
1175 if ((req->nr_sectors + next->nr_sectors) > q->max_sectors)
1176 return 0;
1177
1178 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
1179 if (blk_phys_contig_segment(q, req->biotail, next->bio))
1180 total_phys_segments--;
1181
1182 if (total_phys_segments > q->max_phys_segments)
1183 return 0;
1184
1185 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
1186 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
1187 int len = req->biotail->bi_hw_back_size + next->bio->bi_hw_front_size;
1188
1189
1190
1191 if (req->nr_hw_segments == 1)
1192 req->bio->bi_hw_front_size = len;
1193 if (next->nr_hw_segments == 1)
1194 next->biotail->bi_hw_back_size = len;
1195 total_hw_segments--;
1196 }
1197
1198 if (total_hw_segments > q->max_hw_segments)
1199 return 0;
1200
1201
1202 req->nr_phys_segments = total_phys_segments;
1203 req->nr_hw_segments = total_hw_segments;
1204 return 1;
1205}
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215void blk_plug_device(request_queue_t *q)
1216{
1217 WARN_ON(!irqs_disabled());
1218
1219
1220
1221
1222
1223 if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
1224 return;
1225
1226 if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1227 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
1228}
1229
1230EXPORT_SYMBOL(blk_plug_device);
1231
1232
1233
1234
1235
1236int blk_remove_plug(request_queue_t *q)
1237{
1238 WARN_ON(!irqs_disabled());
1239
1240 if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
1241 return 0;
1242
1243 del_timer(&q->unplug_timer);
1244 return 1;
1245}
1246
1247EXPORT_SYMBOL(blk_remove_plug);
1248
1249
1250
1251
1252void __generic_unplug_device(request_queue_t *q)
1253{
1254 if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))
1255 return;
1256
1257 if (!blk_remove_plug(q))
1258 return;
1259
1260
1261
1262
1263 if (elv_next_request(q))
1264 q->request_fn(q);
1265}
1266EXPORT_SYMBOL(__generic_unplug_device);
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279void generic_unplug_device(request_queue_t *q)
1280{
1281 spin_lock_irq(q->queue_lock);
1282 __generic_unplug_device(q);
1283 spin_unlock_irq(q->queue_lock);
1284}
1285EXPORT_SYMBOL(generic_unplug_device);
1286
1287static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
1288 struct page *page)
1289{
1290 request_queue_t *q = bdi->unplug_io_data;
1291
1292
1293
1294
1295 if (q->unplug_fn)
1296 q->unplug_fn(q);
1297}
1298
1299static void blk_unplug_work(void *data)
1300{
1301 request_queue_t *q = data;
1302
1303 q->unplug_fn(q);
1304}
1305
1306static void blk_unplug_timeout(unsigned long data)
1307{
1308 request_queue_t *q = (request_queue_t *)data;
1309
1310 kblockd_schedule_work(&q->unplug_work);
1311}
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322void blk_start_queue(request_queue_t *q)
1323{
1324 clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1325
1326
1327
1328
1329
1330 if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
1331 q->request_fn(q);
1332 clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags);
1333 } else {
1334 blk_plug_device(q);
1335 kblockd_schedule_work(&q->unplug_work);
1336 }
1337}
1338
1339EXPORT_SYMBOL(blk_start_queue);
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355void blk_stop_queue(request_queue_t *q)
1356{
1357 blk_remove_plug(q);
1358 set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags);
1359}
1360EXPORT_SYMBOL(blk_stop_queue);
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376void blk_sync_queue(struct request_queue *q)
1377{
1378 del_timer_sync(&q->unplug_timer);
1379 kblockd_flush();
1380}
1381EXPORT_SYMBOL(blk_sync_queue);
1382
1383
1384
1385
1386
1387void blk_run_queue(struct request_queue *q)
1388{
1389 unsigned long flags;
1390
1391 spin_lock_irqsave(q->queue_lock, flags);
1392 blk_remove_plug(q);
1393 q->request_fn(q);
1394 spin_unlock_irqrestore(q->queue_lock, flags);
1395}
1396EXPORT_SYMBOL(blk_run_queue);
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413void blk_cleanup_queue(request_queue_t * q)
1414{
1415 struct request_list *rl = &q->rq;
1416
1417 if (!atomic_dec_and_test(&q->refcnt))
1418 return;
1419
1420 if (q->elevator)
1421 elevator_exit(q->elevator);
1422
1423 blk_sync_queue(q);
1424
1425 if (rl->rq_pool)
1426 mempool_destroy(rl->rq_pool);
1427
1428 if (q->queue_tags)
1429 __blk_queue_free_tags(q);
1430
1431 kmem_cache_free(requestq_cachep, q);
1432}
1433
1434EXPORT_SYMBOL(blk_cleanup_queue);
1435
1436static int blk_init_free_list(request_queue_t *q)
1437{
1438 struct request_list *rl = &q->rq;
1439
1440 rl->count[READ] = rl->count[WRITE] = 0;
1441 rl->starved[READ] = rl->starved[WRITE] = 0;
1442 init_waitqueue_head(&rl->wait[READ]);
1443 init_waitqueue_head(&rl->wait[WRITE]);
1444 init_waitqueue_head(&rl->drain);
1445
1446 rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep);
1447
1448 if (!rl->rq_pool)
1449 return -ENOMEM;
1450
1451 return 0;
1452}
1453
1454static int __make_request(request_queue_t *, struct bio *);
1455
1456request_queue_t *blk_alloc_queue(int gfp_mask)
1457{
1458 request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask);
1459
1460 if (!q)
1461 return NULL;
1462
1463 memset(q, 0, sizeof(*q));
1464 init_timer(&q->unplug_timer);
1465 atomic_set(&q->refcnt, 1);
1466
1467 q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug;
1468 q->backing_dev_info.unplug_io_data = q;
1469
1470 return q;
1471}
1472
1473EXPORT_SYMBOL(blk_alloc_queue);
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
1507{
1508 request_queue_t *q = blk_alloc_queue(GFP_KERNEL);
1509
1510 if (!q)
1511 return NULL;
1512
1513 if (blk_init_free_list(q))
1514 goto out_init;
1515
1516 q->request_fn = rfn;
1517 q->back_merge_fn = ll_back_merge_fn;
1518 q->front_merge_fn = ll_front_merge_fn;
1519 q->merge_requests_fn = ll_merge_requests_fn;
1520 q->prep_rq_fn = NULL;
1521 q->unplug_fn = generic_unplug_device;
1522 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
1523 q->queue_lock = lock;
1524
1525 blk_queue_segment_boundary(q, 0xffffffff);
1526
1527 blk_queue_make_request(q, __make_request);
1528 blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE);
1529
1530 blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
1531 blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
1532
1533
1534
1535
1536 if (!elevator_init(q, NULL)) {
1537 blk_queue_congestion_threshold(q);
1538 return q;
1539 }
1540
1541 blk_cleanup_queue(q);
1542out_init:
1543 kmem_cache_free(requestq_cachep, q);
1544 return NULL;
1545}
1546
1547EXPORT_SYMBOL(blk_init_queue);
1548
1549int blk_get_queue(request_queue_t *q)
1550{
1551 if (!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) {
1552 atomic_inc(&q->refcnt);
1553 return 0;
1554 }
1555
1556 return 1;
1557}
1558
1559EXPORT_SYMBOL(blk_get_queue);
1560
1561static inline void blk_free_request(request_queue_t *q, struct request *rq)
1562{
1563 elv_put_request(q, rq);
1564 mempool_free(rq, q->rq.rq_pool);
1565}
1566
1567static inline struct request *blk_alloc_request(request_queue_t *q, int rw,
1568 int gfp_mask)
1569{
1570 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
1571
1572 if (!rq)
1573 return NULL;
1574
1575
1576
1577
1578
1579 rq->flags = rw;
1580
1581 if (!elv_set_request(q, rq, gfp_mask))
1582 return rq;
1583
1584 mempool_free(rq, q->rq.rq_pool);
1585 return NULL;
1586}
1587
1588
1589
1590
1591
1592static inline int ioc_batching(request_queue_t *q, struct io_context *ioc)
1593{
1594 if (!ioc)
1595 return 0;
1596
1597
1598
1599
1600
1601
1602 return ioc->nr_batch_requests == q->nr_batching ||
1603 (ioc->nr_batch_requests > 0
1604 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
1605}
1606
1607
1608
1609
1610
1611
1612
1613void ioc_set_batching(request_queue_t *q, struct io_context *ioc)
1614{
1615 if (!ioc || ioc_batching(q, ioc))
1616 return;
1617
1618 ioc->nr_batch_requests = q->nr_batching;
1619 ioc->last_waited = jiffies;
1620}
1621
1622static void __freed_request(request_queue_t *q, int rw)
1623{
1624 struct request_list *rl = &q->rq;
1625
1626 if (rl->count[rw] < queue_congestion_off_threshold(q))
1627 clear_queue_congested(q, rw);
1628
1629 if (rl->count[rw] + 1 <= q->nr_requests) {
1630 smp_mb();
1631 if (waitqueue_active(&rl->wait[rw]))
1632 wake_up(&rl->wait[rw]);
1633
1634 blk_clear_queue_full(q, rw);
1635 }
1636}
1637
1638
1639
1640
1641
1642static void freed_request(request_queue_t *q, int rw)
1643{
1644 struct request_list *rl = &q->rq;
1645
1646 rl->count[rw]--;
1647
1648 __freed_request(q, rw);
1649
1650 if (unlikely(rl->starved[rw ^ 1]))
1651 __freed_request(q, rw ^ 1);
1652
1653 if (!rl->count[READ] && !rl->count[WRITE]) {
1654 smp_mb();
1655 if (unlikely(waitqueue_active(&rl->drain)))
1656 wake_up(&rl->drain);
1657 }
1658}
1659
1660#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
1661
1662
1663
1664static struct request *get_request(request_queue_t *q, int rw, int gfp_mask)
1665{
1666 struct request *rq = NULL;
1667 struct request_list *rl = &q->rq;
1668 struct io_context *ioc = get_io_context(gfp_mask);
1669
1670 if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
1671 goto out;
1672
1673 spin_lock_irq(q->queue_lock);
1674 if (rl->count[rw]+1 >= q->nr_requests) {
1675
1676
1677
1678
1679
1680
1681 if (!blk_queue_full(q, rw)) {
1682 ioc_set_batching(q, ioc);
1683 blk_set_queue_full(q, rw);
1684 }
1685 }
1686
1687 switch (elv_may_queue(q, rw)) {
1688 case ELV_MQUEUE_NO:
1689 goto rq_starved;
1690 case ELV_MQUEUE_MAY:
1691 break;
1692 case ELV_MQUEUE_MUST:
1693 goto get_rq;
1694 }
1695
1696 if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) {
1697
1698
1699
1700
1701 spin_unlock_irq(q->queue_lock);
1702 goto out;
1703 }
1704
1705get_rq:
1706 rl->count[rw]++;
1707 rl->starved[rw] = 0;
1708 if (rl->count[rw] >= queue_congestion_on_threshold(q))
1709 set_queue_congested(q, rw);
1710 spin_unlock_irq(q->queue_lock);
1711
1712 rq = blk_alloc_request(q, rw, gfp_mask);
1713 if (!rq) {
1714
1715
1716
1717
1718
1719
1720
1721 spin_lock_irq(q->queue_lock);
1722 freed_request(q, rw);
1723
1724
1725
1726
1727
1728
1729
1730
1731rq_starved:
1732 if (unlikely(rl->count[rw] == 0))
1733 rl->starved[rw] = 1;
1734
1735 spin_unlock_irq(q->queue_lock);
1736 goto out;
1737 }
1738
1739 if (ioc_batching(q, ioc))
1740 ioc->nr_batch_requests--;
1741
1742 INIT_LIST_HEAD(&rq->queuelist);
1743
1744 rq->errors = 0;
1745 rq->rq_status = RQ_ACTIVE;
1746 rq->bio = rq->biotail = NULL;
1747 rq->buffer = NULL;
1748 rq->ref_count = 1;
1749 rq->q = q;
1750 rq->rl = rl;
1751 rq->waiting = NULL;
1752 rq->special = NULL;
1753 rq->data_len = 0;
1754 rq->data = NULL;
1755 rq->sense = NULL;
1756
1757out:
1758 put_io_context(ioc);
1759 return rq;
1760}
1761
1762
1763
1764
1765
1766static struct request *get_request_wait(request_queue_t *q, int rw)
1767{
1768 DEFINE_WAIT(wait);
1769 struct request *rq;
1770
1771 generic_unplug_device(q);
1772 do {
1773 struct request_list *rl = &q->rq;
1774
1775 prepare_to_wait_exclusive(&rl->wait[rw], &wait,
1776 TASK_UNINTERRUPTIBLE);
1777
1778 rq = get_request(q, rw, GFP_NOIO);
1779
1780 if (!rq) {
1781 struct io_context *ioc;
1782
1783 io_schedule();
1784
1785
1786
1787
1788
1789
1790
1791 ioc = get_io_context(GFP_NOIO);
1792 ioc_set_batching(q, ioc);
1793 put_io_context(ioc);
1794 }
1795 finish_wait(&rl->wait[rw], &wait);
1796 } while (!rq);
1797
1798 return rq;
1799}
1800
1801struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
1802{
1803 struct request *rq;
1804
1805 BUG_ON(rw != READ && rw != WRITE);
1806
1807 if (gfp_mask & __GFP_WAIT)
1808 rq = get_request_wait(q, rw);
1809 else
1810 rq = get_request(q, rw, gfp_mask);
1811
1812 return rq;
1813}
1814
1815EXPORT_SYMBOL(blk_get_request);
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827void blk_requeue_request(request_queue_t *q, struct request *rq)
1828{
1829 if (blk_rq_tagged(rq))
1830 blk_queue_end_tag(q, rq);
1831
1832 elv_requeue_request(q, rq);
1833}
1834
1835EXPORT_SYMBOL(blk_requeue_request);
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857void blk_insert_request(request_queue_t *q, struct request *rq,
1858 int at_head, void *data, int reinsert)
1859{
1860 unsigned long flags;
1861
1862
1863
1864
1865
1866
1867 rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
1868
1869 rq->special = data;
1870
1871 spin_lock_irqsave(q->queue_lock, flags);
1872
1873
1874
1875
1876 if (reinsert)
1877 blk_requeue_request(q, rq);
1878 else {
1879 int where = ELEVATOR_INSERT_BACK;
1880
1881 if (at_head)
1882 where = ELEVATOR_INSERT_FRONT;
1883
1884 if (blk_rq_tagged(rq))
1885 blk_queue_end_tag(q, rq);
1886
1887 drive_stat_acct(rq, rq->nr_sectors, 1);
1888 __elv_add_request(q, rq, where, 0);
1889 }
1890 if (blk_queue_plugged(q))
1891 __generic_unplug_device(q);
1892 else
1893 q->request_fn(q);
1894 spin_unlock_irqrestore(q->queue_lock, flags);
1895}
1896
1897EXPORT_SYMBOL(blk_insert_request);
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf,
1920 unsigned int len)
1921{
1922 unsigned long uaddr;
1923 struct request *rq;
1924 struct bio *bio;
1925
1926 if (len > (q->max_sectors << 9))
1927 return ERR_PTR(-EINVAL);
1928 if ((!len && ubuf) || (len && !ubuf))
1929 return ERR_PTR(-EINVAL);
1930
1931 rq = blk_get_request(q, rw, __GFP_WAIT);
1932 if (!rq)
1933 return ERR_PTR(-ENOMEM);
1934
1935
1936
1937
1938
1939 uaddr = (unsigned long) ubuf;
1940 if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q)))
1941 bio = bio_map_user(q, NULL, uaddr, len, rw == READ);
1942 else
1943 bio = bio_copy_user(q, uaddr, len, rw == READ);
1944
1945 if (!IS_ERR(bio)) {
1946 rq->bio = rq->biotail = bio;
1947 blk_rq_bio_prep(q, rq, bio);
1948
1949 rq->buffer = rq->data = NULL;
1950 rq->data_len = len;
1951 return rq;
1952 }
1953
1954
1955
1956
1957 blk_put_request(rq);
1958 return (struct request *) bio;
1959}
1960
1961EXPORT_SYMBOL(blk_rq_map_user);
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen)
1973{
1974 int ret = 0;
1975
1976 if (bio) {
1977 if (bio_flagged(bio, BIO_USER_MAPPED))
1978 bio_unmap_user(bio);
1979 else
1980 ret = bio_uncopy_user(bio);
1981 }
1982
1983 blk_put_request(rq);
1984 return ret;
1985}
1986
1987EXPORT_SYMBOL(blk_rq_unmap_user);
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
2000 struct request *rq)
2001{
2002 DECLARE_COMPLETION(wait);
2003 char sense[SCSI_SENSE_BUFFERSIZE];
2004 int err = 0;
2005
2006 rq->rq_disk = bd_disk;
2007
2008
2009
2010
2011
2012 rq->ref_count++;
2013
2014 if (!rq->sense) {
2015 memset(sense, 0, sizeof(sense));
2016 rq->sense = sense;
2017 rq->sense_len = 0;
2018 }
2019
2020 rq->flags |= REQ_NOMERGE;
2021 if (!rq->waiting)
2022 rq->waiting = &wait;
2023 elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
2024 generic_unplug_device(q);
2025 wait_for_completion(rq->waiting);
2026 rq->waiting = NULL;
2027
2028 if (rq->errors)
2029 err = -EIO;
2030
2031 return err;
2032}
2033
2034EXPORT_SYMBOL(blk_execute_rq);
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
2047{
2048 request_queue_t *q;
2049
2050 if (bdev->bd_disk == NULL)
2051 return -ENXIO;
2052
2053 q = bdev_get_queue(bdev);
2054 if (!q)
2055 return -ENXIO;
2056 if (!q->issue_flush_fn)
2057 return -EOPNOTSUPP;
2058
2059 return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
2060}
2061
2062EXPORT_SYMBOL(blkdev_issue_flush);
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
2077 sector_t *error_sector)
2078{
2079 struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
2080 int ret;
2081
2082 rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
2083 rq->sector = 0;
2084 memset(rq->cmd, 0, sizeof(rq->cmd));
2085 rq->cmd[0] = 0x35;
2086 rq->cmd_len = 12;
2087 rq->data = NULL;
2088 rq->data_len = 0;
2089 rq->timeout = 60 * HZ;
2090
2091 ret = blk_execute_rq(q, disk, rq);
2092
2093 if (ret && error_sector)
2094 *error_sector = rq->sector;
2095
2096 blk_put_request(rq);
2097 return ret;
2098}
2099
2100EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn);
2101
2102void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
2103{
2104 int rw = rq_data_dir(rq);
2105
2106 if (!blk_fs_request(rq) || !rq->rq_disk)
2107 return;
2108
2109 if (rw == READ) {
2110 __disk_stat_add(rq->rq_disk, read_sectors, nr_sectors);
2111 if (!new_io)
2112 __disk_stat_inc(rq->rq_disk, read_merges);
2113 } else if (rw == WRITE) {
2114 __disk_stat_add(rq->rq_disk, write_sectors, nr_sectors);
2115 if (!new_io)
2116 __disk_stat_inc(rq->rq_disk, write_merges);
2117 }
2118 if (new_io) {
2119 disk_round_stats(rq->rq_disk);
2120 rq->rq_disk->in_flight++;
2121 }
2122}
2123
2124
2125
2126
2127
2128
2129static inline void add_request(request_queue_t * q, struct request * req)
2130{
2131 drive_stat_acct(req, req->nr_sectors, 1);
2132
2133 if (q->activity_fn)
2134 q->activity_fn(q->activity_data, rq_data_dir(req));
2135
2136
2137
2138
2139
2140 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
2141}
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158void disk_round_stats(struct gendisk *disk)
2159{
2160 unsigned long now = jiffies;
2161
2162 __disk_stat_add(disk, time_in_queue,
2163 disk->in_flight * (now - disk->stamp));
2164 disk->stamp = now;
2165
2166 if (disk->in_flight)
2167 __disk_stat_add(disk, io_ticks, (now - disk->stamp_idle));
2168 disk->stamp_idle = now;
2169}
2170
2171
2172
2173
2174void __blk_put_request(request_queue_t *q, struct request *req)
2175{
2176 struct request_list *rl = req->rl;
2177
2178 if (unlikely(!q))
2179 return;
2180 if (unlikely(--req->ref_count))
2181 return;
2182
2183 req->rq_status = RQ_INACTIVE;
2184 req->q = NULL;
2185 req->rl = NULL;
2186
2187
2188
2189
2190
2191 if (rl) {
2192 int rw = rq_data_dir(req);
2193
2194 elv_completed_request(q, req);
2195
2196 BUG_ON(!list_empty(&req->queuelist));
2197
2198 blk_free_request(q, req);
2199 freed_request(q, rw);
2200 }
2201}
2202
2203void blk_put_request(struct request *req)
2204{
2205
2206
2207
2208
2209 if (req->rl) {
2210 unsigned long flags;
2211 request_queue_t *q = req->q;
2212
2213 spin_lock_irqsave(q->queue_lock, flags);
2214 __blk_put_request(q, req);
2215 spin_unlock_irqrestore(q->queue_lock, flags);
2216 }
2217}
2218
2219EXPORT_SYMBOL(blk_put_request);
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230long blk_congestion_wait(int rw, long timeout)
2231{
2232 long ret;
2233 DEFINE_WAIT(wait);
2234 wait_queue_head_t *wqh = &congestion_wqh[rw];
2235
2236 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
2237 ret = io_schedule_timeout(timeout);
2238 finish_wait(wqh, &wait);
2239 return ret;
2240}
2241
2242EXPORT_SYMBOL(blk_congestion_wait);
2243
2244
2245
2246
2247static int attempt_merge(request_queue_t *q, struct request *req,
2248 struct request *next)
2249{
2250 if (!rq_mergeable(req) || !rq_mergeable(next))
2251 return 0;
2252
2253
2254
2255
2256 if (req->sector + req->nr_sectors != next->sector)
2257 return 0;
2258
2259 if (rq_data_dir(req) != rq_data_dir(next)
2260 || req->rq_disk != next->rq_disk
2261 || next->waiting || next->special)
2262 return 0;
2263
2264
2265
2266
2267
2268
2269
2270 if (!q->merge_requests_fn(q, req, next))
2271 return 0;
2272
2273
2274
2275
2276
2277
2278
2279 if (time_after(req->start_time, next->start_time))
2280 req->start_time = next->start_time;
2281
2282 req->biotail->bi_next = next->bio;
2283 req->biotail = next->biotail;
2284
2285 req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
2286
2287 elv_merge_requests(q, req, next);
2288
2289 if (req->rq_disk) {
2290 disk_round_stats(req->rq_disk);
2291 req->rq_disk->in_flight--;
2292 }
2293
2294 __blk_put_request(q, next);
2295 return 1;
2296}
2297
2298static inline int attempt_back_merge(request_queue_t *q, struct request *rq)
2299{
2300 struct request *next = elv_latter_request(q, rq);
2301
2302 if (next)
2303 return attempt_merge(q, rq, next);
2304
2305 return 0;
2306}
2307
2308static inline int attempt_front_merge(request_queue_t *q, struct request *rq)
2309{
2310 struct request *prev = elv_former_request(q, rq);
2311
2312 if (prev)
2313 return attempt_merge(q, prev, rq);
2314
2315 return 0;
2316}
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331void blk_attempt_remerge(request_queue_t *q, struct request *rq)
2332{
2333 unsigned long flags;
2334
2335 spin_lock_irqsave(q->queue_lock, flags);
2336 attempt_back_merge(q, rq);
2337 spin_unlock_irqrestore(q->queue_lock, flags);
2338}
2339
2340EXPORT_SYMBOL(blk_attempt_remerge);
2341
2342
2343
2344
2345void __blk_attempt_remerge(request_queue_t *q, struct request *rq)
2346{
2347 attempt_back_merge(q, rq);
2348}
2349
2350EXPORT_SYMBOL(__blk_attempt_remerge);
2351
2352static int __make_request(request_queue_t *q, struct bio *bio)
2353{
2354 struct request *req, *freereq = NULL;
2355 int el_ret, rw, nr_sectors, cur_nr_sectors, barrier, err;
2356 sector_t sector;
2357
2358 sector = bio->bi_sector;
2359 nr_sectors = bio_sectors(bio);
2360 cur_nr_sectors = bio_cur_sectors(bio);
2361
2362 rw = bio_data_dir(bio);
2363
2364
2365
2366
2367
2368
2369 blk_queue_bounce(q, &bio);
2370
2371 spin_lock_prefetch(q->queue_lock);
2372
2373 barrier = bio_barrier(bio);
2374 if (barrier && !(q->queue_flags & (1 << QUEUE_FLAG_ORDERED))) {
2375 err = -EOPNOTSUPP;
2376 goto end_io;
2377 }
2378
2379again:
2380 spin_lock_irq(q->queue_lock);
2381
2382 if (elv_queue_empty(q)) {
2383 blk_plug_device(q);
2384 goto get_rq;
2385 }
2386 if (barrier)
2387 goto get_rq;
2388
2389 el_ret = elv_merge(q, &req, bio);
2390 switch (el_ret) {
2391 case ELEVATOR_BACK_MERGE:
2392 BUG_ON(!rq_mergeable(req));
2393
2394 if (!q->back_merge_fn(q, req, bio))
2395 break;
2396
2397 req->biotail->bi_next = bio;
2398 req->biotail = bio;
2399 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2400 drive_stat_acct(req, nr_sectors, 0);
2401 if (!attempt_back_merge(q, req))
2402 elv_merged_request(q, req);
2403 goto out;
2404
2405 case ELEVATOR_FRONT_MERGE:
2406 BUG_ON(!rq_mergeable(req));
2407
2408 if (!q->front_merge_fn(q, req, bio))
2409 break;
2410
2411 bio->bi_next = req->bio;
2412 req->bio = bio;
2413
2414
2415
2416
2417
2418
2419 req->buffer = bio_data(bio);
2420 req->current_nr_sectors = cur_nr_sectors;
2421 req->hard_cur_sectors = cur_nr_sectors;
2422 req->sector = req->hard_sector = sector;
2423 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
2424 drive_stat_acct(req, nr_sectors, 0);
2425 if (!attempt_front_merge(q, req))
2426 elv_merged_request(q, req);
2427 goto out;
2428
2429
2430
2431
2432 case ELEVATOR_NO_MERGE:
2433 break;
2434
2435 default:
2436 printk("elevator returned crap (%d)\n", el_ret);
2437 BUG();
2438 }
2439
2440
2441
2442
2443
2444
2445get_rq:
2446 if (freereq) {
2447 req = freereq;
2448 freereq = NULL;
2449 } else {
2450 spin_unlock_irq(q->queue_lock);
2451 if ((freereq = get_request(q, rw, GFP_ATOMIC)) == NULL) {
2452
2453
2454
2455 err = -EWOULDBLOCK;
2456 if (bio_rw_ahead(bio))
2457 goto end_io;
2458
2459 freereq = get_request_wait(q, rw);
2460 }
2461 goto again;
2462 }
2463
2464 req->flags |= REQ_CMD;
2465
2466
2467
2468
2469 if (bio_rw_ahead(bio) || bio_failfast(bio))
2470 req->flags |= REQ_FAILFAST;
2471
2472
2473
2474
2475 if (barrier)
2476 req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
2477
2478 req->errors = 0;
2479 req->hard_sector = req->sector = sector;
2480 req->hard_nr_sectors = req->nr_sectors = nr_sectors;
2481 req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors;
2482 req->nr_phys_segments = bio_phys_segments(q, bio);
2483 req->nr_hw_segments = bio_hw_segments(q, bio);
2484 req->buffer = bio_data(bio);
2485 req->waiting = NULL;
2486 req->bio = req->biotail = bio;
2487 req->rq_disk = bio->bi_bdev->bd_disk;
2488 req->start_time = jiffies;
2489
2490 add_request(q, req);
2491out:
2492 if (freereq)
2493 __blk_put_request(q, freereq);
2494 if (bio_sync(bio))
2495 __generic_unplug_device(q);
2496
2497 spin_unlock_irq(q->queue_lock);
2498 return 0;
2499
2500end_io:
2501 bio_endio(bio, nr_sectors << 9, err);
2502 return 0;
2503}
2504
2505
2506
2507
2508static inline void blk_partition_remap(struct bio *bio)
2509{
2510 struct block_device *bdev = bio->bi_bdev;
2511
2512 if (bdev != bdev->bd_contains) {
2513 struct hd_struct *p = bdev->bd_part;
2514
2515 switch (bio->bi_rw) {
2516 case READ:
2517 p->read_sectors += bio_sectors(bio);
2518 p->reads++;
2519 break;
2520 case WRITE:
2521 p->write_sectors += bio_sectors(bio);
2522 p->writes++;
2523 break;
2524 }
2525 bio->bi_sector += p->start_sect;
2526 bio->bi_bdev = bdev->bd_contains;
2527 }
2528}
2529
2530void blk_finish_queue_drain(request_queue_t *q)
2531{
2532 struct request_list *rl = &q->rq;
2533 struct request *rq;
2534
2535 spin_lock_irq(q->queue_lock);
2536 clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2537
2538 while (!list_empty(&q->drain_list)) {
2539 rq = list_entry_rq(q->drain_list.next);
2540
2541 list_del_init(&rq->queuelist);
2542 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
2543 }
2544
2545 spin_unlock_irq(q->queue_lock);
2546
2547 wake_up(&rl->wait[0]);
2548 wake_up(&rl->wait[1]);
2549 wake_up(&rl->drain);
2550}
2551
2552static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
2553{
2554 int wait = rl->count[READ] + rl->count[WRITE];
2555
2556 if (dispatch)
2557 wait += !list_empty(&q->queue_head);
2558
2559 return wait;
2560}
2561
2562
2563
2564
2565
2566
2567
2568void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
2569{
2570 struct request_list *rl = &q->rq;
2571 DEFINE_WAIT(wait);
2572
2573 spin_lock_irq(q->queue_lock);
2574 set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
2575
2576 while (wait_drain(q, rl, wait_dispatch)) {
2577 prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
2578
2579 if (wait_drain(q, rl, wait_dispatch)) {
2580 __generic_unplug_device(q);
2581 spin_unlock_irq(q->queue_lock);
2582 io_schedule();
2583 spin_lock_irq(q->queue_lock);
2584 }
2585
2586 finish_wait(&rl->drain, &wait);
2587 }
2588
2589 spin_unlock_irq(q->queue_lock);
2590}
2591
2592
2593
2594
2595static inline void block_wait_queue_running(request_queue_t *q)
2596{
2597 DEFINE_WAIT(wait);
2598
2599 while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
2600 struct request_list *rl = &q->rq;
2601
2602 prepare_to_wait_exclusive(&rl->drain, &wait,
2603 TASK_UNINTERRUPTIBLE);
2604
2605
2606
2607
2608
2609 if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
2610 io_schedule();
2611
2612 finish_wait(&rl->drain, &wait);
2613 }
2614}
2615
2616static void handle_bad_sector(struct bio *bio)
2617{
2618 char b[BDEVNAME_SIZE];
2619
2620 printk(KERN_INFO "attempt to access beyond end of device\n");
2621 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
2622 bdevname(bio->bi_bdev, b),
2623 bio->bi_rw,
2624 (unsigned long long)bio->bi_sector + bio_sectors(bio),
2625 (long long)(bio->bi_bdev->bd_inode->i_size >> 9));
2626
2627 set_bit(BIO_EOF, &bio->bi_flags);
2628}
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654void generic_make_request(struct bio *bio)
2655{
2656 request_queue_t *q;
2657 sector_t maxsector;
2658 int ret, nr_sectors = bio_sectors(bio);
2659
2660 might_sleep();
2661
2662 maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
2663 if (maxsector) {
2664 sector_t sector = bio->bi_sector;
2665
2666 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
2667
2668
2669
2670
2671
2672 handle_bad_sector(bio);
2673 goto end_io;
2674 }
2675 }
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685 do {
2686 char b[BDEVNAME_SIZE];
2687
2688 q = bdev_get_queue(bio->bi_bdev);
2689 if (!q) {
2690 printk(KERN_ERR
2691 "generic_make_request: Trying to access "
2692 "nonexistent block-device %s (%Lu)\n",
2693 bdevname(bio->bi_bdev, b),
2694 (long long) bio->bi_sector);
2695end_io:
2696 bio_endio(bio, bio->bi_size, -EIO);
2697 break;
2698 }
2699
2700 if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
2701 printk("bio too big device %s (%u > %u)\n",
2702 bdevname(bio->bi_bdev, b),
2703 bio_sectors(bio),
2704 q->max_hw_sectors);
2705 goto end_io;
2706 }
2707
2708 if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))
2709 goto end_io;
2710
2711 block_wait_queue_running(q);
2712
2713
2714
2715
2716
2717 blk_partition_remap(bio);
2718
2719 ret = q->make_request_fn(q, bio);
2720 } while (ret);
2721}
2722
2723EXPORT_SYMBOL(generic_make_request);
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735void submit_bio(int rw, struct bio *bio)
2736{
2737 int count = bio_sectors(bio);
2738
2739 BIO_BUG_ON(!bio->bi_size);
2740 BIO_BUG_ON(!bio->bi_io_vec);
2741 bio->bi_rw = rw;
2742 if (rw & WRITE)
2743 mod_page_state(pgpgout, count);
2744 else
2745 mod_page_state(pgpgin, count);
2746
2747 if (unlikely(block_dump)) {
2748 char b[BDEVNAME_SIZE];
2749 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
2750 current->comm, current->pid,
2751 (rw & WRITE) ? "WRITE" : "READ",
2752 (unsigned long long)bio->bi_sector,
2753 bdevname(bio->bi_bdev,b));
2754 }
2755
2756 generic_make_request(bio);
2757}
2758
2759EXPORT_SYMBOL(submit_bio);
2760
2761void blk_recalc_rq_segments(struct request *rq)
2762{
2763 struct bio *bio, *prevbio = NULL;
2764 int nr_phys_segs, nr_hw_segs;
2765 unsigned int phys_size, hw_size;
2766 request_queue_t *q = rq->q;
2767
2768 if (!rq->bio)
2769 return;
2770
2771 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0;
2772 rq_for_each_bio(bio, rq) {
2773
2774 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
2775
2776 nr_phys_segs += bio_phys_segments(q, bio);
2777 nr_hw_segs += bio_hw_segments(q, bio);
2778 if (prevbio) {
2779 int pseg = phys_size + prevbio->bi_size + bio->bi_size;
2780 int hseg = hw_size + prevbio->bi_size + bio->bi_size;
2781
2782 if (blk_phys_contig_segment(q, prevbio, bio) &&
2783 pseg <= q->max_segment_size) {
2784 nr_phys_segs--;
2785 phys_size += prevbio->bi_size + bio->bi_size;
2786 } else
2787 phys_size = 0;
2788
2789 if (blk_hw_contig_segment(q, prevbio, bio) &&
2790 hseg <= q->max_segment_size) {
2791 nr_hw_segs--;
2792 hw_size += prevbio->bi_size + bio->bi_size;
2793 } else
2794 hw_size = 0;
2795 }
2796 prevbio = bio;
2797 }
2798
2799 rq->nr_phys_segments = nr_phys_segs;
2800 rq->nr_hw_segments = nr_hw_segs;
2801}
2802
2803void blk_recalc_rq_sectors(struct request *rq, int nsect)
2804{
2805 if (blk_fs_request(rq)) {
2806 rq->hard_sector += nsect;
2807 rq->hard_nr_sectors -= nsect;
2808
2809
2810
2811
2812 if ((rq->nr_sectors >= rq->hard_nr_sectors) &&
2813 (rq->sector <= rq->hard_sector)) {
2814 rq->sector = rq->hard_sector;
2815 rq->nr_sectors = rq->hard_nr_sectors;
2816 rq->hard_cur_sectors = bio_cur_sectors(rq->bio);
2817 rq->current_nr_sectors = rq->hard_cur_sectors;
2818 rq->buffer = bio_data(rq->bio);
2819 }
2820
2821
2822
2823
2824
2825 if (rq->nr_sectors < rq->current_nr_sectors) {
2826 printk("blk: request botched\n");
2827 rq->nr_sectors = rq->current_nr_sectors;
2828 }
2829 }
2830}
2831
2832static int __end_that_request_first(struct request *req, int uptodate,
2833 int nr_bytes)
2834{
2835 int total_bytes, bio_nbytes, error, next_idx = 0;
2836 struct bio *bio;
2837
2838
2839
2840
2841 error = 0;
2842 if (end_io_error(uptodate))
2843 error = !uptodate ? -EIO : uptodate;
2844
2845
2846
2847
2848
2849 if (!blk_pc_request(req))
2850 req->errors = 0;
2851
2852 if (!uptodate) {
2853 if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
2854 printk("end_request: I/O error, dev %s, sector %llu\n",
2855 req->rq_disk ? req->rq_disk->disk_name : "?",
2856 (unsigned long long)req->sector);
2857 }
2858
2859 total_bytes = bio_nbytes = 0;
2860 while ((bio = req->bio) != NULL) {
2861 int nbytes;
2862
2863 if (nr_bytes >= bio->bi_size) {
2864 req->bio = bio->bi_next;
2865 nbytes = bio->bi_size;
2866 bio_endio(bio, nbytes, error);
2867 next_idx = 0;
2868 bio_nbytes = 0;
2869 } else {
2870 int idx = bio->bi_idx + next_idx;
2871
2872 if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
2873 blk_dump_rq_flags(req, "__end_that");
2874 printk("%s: bio idx %d >= vcnt %d\n",
2875 __FUNCTION__,
2876 bio->bi_idx, bio->bi_vcnt);
2877 break;
2878 }
2879
2880 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2881 BIO_BUG_ON(nbytes > bio->bi_size);
2882
2883
2884
2885
2886 if (unlikely(nbytes > nr_bytes)) {
2887 bio_nbytes += nr_bytes;
2888 total_bytes += nr_bytes;
2889 break;
2890 }
2891
2892
2893
2894
2895 next_idx++;
2896 bio_nbytes += nbytes;
2897 }
2898
2899 total_bytes += nbytes;
2900 nr_bytes -= nbytes;
2901
2902 if ((bio = req->bio)) {
2903
2904
2905
2906 if (unlikely(nr_bytes <= 0))
2907 break;
2908 }
2909 }
2910
2911
2912
2913
2914 if (!req->bio)
2915 return 0;
2916
2917
2918
2919
2920 if (bio_nbytes) {
2921 bio_endio(bio, bio_nbytes, error);
2922 bio->bi_idx += next_idx;
2923 bio_iovec(bio)->bv_offset += nr_bytes;
2924 bio_iovec(bio)->bv_len -= nr_bytes;
2925 }
2926
2927 blk_recalc_rq_sectors(req, total_bytes >> 9);
2928 blk_recalc_rq_segments(req);
2929 return 1;
2930}
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946int end_that_request_first(struct request *req, int uptodate, int nr_sectors)
2947{
2948 return __end_that_request_first(req, uptodate, nr_sectors << 9);
2949}
2950
2951EXPORT_SYMBOL(end_that_request_first);
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)
2969{
2970 return __end_that_request_first(req, uptodate, nr_bytes);
2971}
2972
2973EXPORT_SYMBOL(end_that_request_chunk);
2974
2975
2976
2977
2978void end_that_request_last(struct request *req)
2979{
2980 struct gendisk *disk = req->rq_disk;
2981 struct completion *waiting = req->waiting;
2982
2983 if (unlikely(laptop_mode) && blk_fs_request(req))
2984 laptop_io_completion();
2985
2986 if (disk && blk_fs_request(req)) {
2987 unsigned long duration = jiffies - req->start_time;
2988 switch (rq_data_dir(req)) {
2989 case WRITE:
2990 __disk_stat_inc(disk, writes);
2991 __disk_stat_add(disk, write_ticks, duration);
2992 break;
2993 case READ:
2994 __disk_stat_inc(disk, reads);
2995 __disk_stat_add(disk, read_ticks, duration);
2996 break;
2997 }
2998 disk_round_stats(disk);
2999 disk->in_flight--;
3000 }
3001 __blk_put_request(req->q, req);
3002
3003 if (waiting)
3004 complete(waiting);
3005}
3006
3007EXPORT_SYMBOL(end_that_request_last);
3008
3009void end_request(struct request *req, int uptodate)
3010{
3011 if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
3012 add_disk_randomness(req->rq_disk);
3013 blkdev_dequeue_request(req);
3014 end_that_request_last(req);
3015 }
3016}
3017
3018EXPORT_SYMBOL(end_request);
3019
3020void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
3021{
3022
3023 rq->flags |= (bio->bi_rw & 7);
3024
3025 rq->nr_phys_segments = bio_phys_segments(q, bio);
3026 rq->nr_hw_segments = bio_hw_segments(q, bio);
3027 rq->current_nr_sectors = bio_cur_sectors(bio);
3028 rq->hard_cur_sectors = rq->current_nr_sectors;
3029 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
3030 rq->buffer = bio_data(bio);
3031
3032 rq->bio = rq->biotail = bio;
3033}
3034
3035EXPORT_SYMBOL(blk_rq_bio_prep);
3036
3037int kblockd_schedule_work(struct work_struct *work)
3038{
3039 return queue_work(kblockd_workqueue, work);
3040}
3041
3042EXPORT_SYMBOL(kblockd_schedule_work);
3043
3044void kblockd_flush(void)
3045{
3046 flush_workqueue(kblockd_workqueue);
3047}
3048EXPORT_SYMBOL(kblockd_flush);
3049
3050int __init blk_dev_init(void)
3051{
3052 kblockd_workqueue = create_workqueue("kblockd");
3053 if (!kblockd_workqueue)
3054 panic("Failed to create kblockd\n");
3055
3056 request_cachep = kmem_cache_create("blkdev_requests",
3057 sizeof(struct request), 0, SLAB_PANIC, NULL, NULL);
3058
3059 requestq_cachep = kmem_cache_create("blkdev_queue",
3060 sizeof(request_queue_t), 0, SLAB_PANIC, NULL, NULL);
3061
3062 iocontext_cachep = kmem_cache_create("blkdev_ioc",
3063 sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
3064
3065 blk_max_low_pfn = max_low_pfn;
3066 blk_max_pfn = max_pfn;
3067
3068 return 0;
3069}
3070
3071
3072
3073
3074void put_io_context(struct io_context *ioc)
3075{
3076 if (ioc == NULL)
3077 return;
3078
3079 BUG_ON(atomic_read(&ioc->refcount) == 0);
3080
3081 if (atomic_dec_and_test(&ioc->refcount)) {
3082 if (ioc->aic && ioc->aic->dtor)
3083 ioc->aic->dtor(ioc->aic);
3084 if (ioc->cic && ioc->cic->dtor)
3085 ioc->cic->dtor(ioc->cic);
3086
3087 kmem_cache_free(iocontext_cachep, ioc);
3088 }
3089}
3090EXPORT_SYMBOL(put_io_context);
3091
3092
3093void exit_io_context(void)
3094{
3095 unsigned long flags;
3096 struct io_context *ioc;
3097
3098 local_irq_save(flags);
3099 ioc = current->io_context;
3100 current->io_context = NULL;
3101 local_irq_restore(flags);
3102
3103 if (ioc->aic && ioc->aic->exit)
3104 ioc->aic->exit(ioc->aic);
3105 if (ioc->cic && ioc->cic->exit)
3106 ioc->cic->exit(ioc->cic);
3107
3108 put_io_context(ioc);
3109}
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119struct io_context *get_io_context(int gfp_flags)
3120{
3121 struct task_struct *tsk = current;
3122 unsigned long flags;
3123 struct io_context *ret;
3124
3125 local_irq_save(flags);
3126 ret = tsk->io_context;
3127 if (ret)
3128 goto out;
3129
3130 local_irq_restore(flags);
3131
3132 ret = kmem_cache_alloc(iocontext_cachep, gfp_flags);
3133 if (ret) {
3134 atomic_set(&ret->refcount, 1);
3135 ret->pid = tsk->pid;
3136 ret->last_waited = jiffies;
3137 ret->nr_batch_requests = 0;
3138 ret->aic = NULL;
3139 ret->cic = NULL;
3140 spin_lock_init(&ret->lock);
3141
3142 local_irq_save(flags);
3143
3144
3145
3146
3147
3148 if (!tsk->io_context)
3149 tsk->io_context = ret;
3150 else {
3151 kmem_cache_free(iocontext_cachep, ret);
3152 ret = tsk->io_context;
3153 }
3154
3155out:
3156 atomic_inc(&ret->refcount);
3157 local_irq_restore(flags);
3158 }
3159
3160 return ret;
3161}
3162EXPORT_SYMBOL(get_io_context);
3163
3164void copy_io_context(struct io_context **pdst, struct io_context **psrc)
3165{
3166 struct io_context *src = *psrc;
3167 struct io_context *dst = *pdst;
3168
3169 if (src) {
3170 BUG_ON(atomic_read(&src->refcount) == 0);
3171 atomic_inc(&src->refcount);
3172 put_io_context(dst);
3173 *pdst = src;
3174 }
3175}
3176EXPORT_SYMBOL(copy_io_context);
3177
3178void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
3179{
3180 struct io_context *temp;
3181 temp = *ioc1;
3182 *ioc1 = *ioc2;
3183 *ioc2 = temp;
3184}
3185EXPORT_SYMBOL(swap_io_context);
3186
3187
3188
3189
3190struct queue_sysfs_entry {
3191 struct attribute attr;
3192 ssize_t (*show)(struct request_queue *, char *);
3193 ssize_t (*store)(struct request_queue *, const char *, size_t);
3194};
3195
3196static ssize_t
3197queue_var_show(unsigned int var, char *page)
3198{
3199 return sprintf(page, "%d\n", var);
3200}
3201
3202static ssize_t
3203queue_var_store(unsigned long *var, const char *page, size_t count)
3204{
3205 char *p = (char *) page;
3206
3207 *var = simple_strtoul(p, &p, 10);
3208 return count;
3209}
3210
3211static ssize_t queue_requests_show(struct request_queue *q, char *page)
3212{
3213 return queue_var_show(q->nr_requests, (page));
3214}
3215
3216static ssize_t
3217queue_requests_store(struct request_queue *q, const char *page, size_t count)
3218{
3219 struct request_list *rl = &q->rq;
3220
3221 int ret = queue_var_store(&q->nr_requests, page, count);
3222 if (q->nr_requests < BLKDEV_MIN_RQ)
3223 q->nr_requests = BLKDEV_MIN_RQ;
3224 blk_queue_congestion_threshold(q);
3225
3226 if (rl->count[READ] >= queue_congestion_on_threshold(q))
3227 set_queue_congested(q, READ);
3228 else if (rl->count[READ] < queue_congestion_off_threshold(q))
3229 clear_queue_congested(q, READ);
3230
3231 if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
3232 set_queue_congested(q, WRITE);
3233 else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
3234 clear_queue_congested(q, WRITE);
3235
3236 if (rl->count[READ] >= q->nr_requests) {
3237 blk_set_queue_full(q, READ);
3238 } else if (rl->count[READ]+1 <= q->nr_requests) {
3239 blk_clear_queue_full(q, READ);
3240 wake_up(&rl->wait[READ]);
3241 }
3242
3243 if (rl->count[WRITE] >= q->nr_requests) {
3244 blk_set_queue_full(q, WRITE);
3245 } else if (rl->count[WRITE]+1 <= q->nr_requests) {
3246 blk_clear_queue_full(q, WRITE);
3247 wake_up(&rl->wait[WRITE]);
3248 }
3249 return ret;
3250}
3251
3252static ssize_t queue_ra_show(struct request_queue *q, char *page)
3253{
3254 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3255
3256 return queue_var_show(ra_kb, (page));
3257}
3258
3259static ssize_t
3260queue_ra_store(struct request_queue *q, const char *page, size_t count)
3261{
3262 unsigned long ra_kb;
3263 ssize_t ret = queue_var_store(&ra_kb, page, count);
3264
3265 spin_lock_irq(q->queue_lock);
3266 if (ra_kb > (q->max_sectors >> 1))
3267 ra_kb = (q->max_sectors >> 1);
3268
3269 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
3270 spin_unlock_irq(q->queue_lock);
3271
3272 return ret;
3273}
3274
3275static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
3276{
3277 int max_sectors_kb = q->max_sectors >> 1;
3278
3279 return queue_var_show(max_sectors_kb, (page));
3280}
3281
3282static ssize_t
3283queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
3284{
3285 unsigned long max_sectors_kb,
3286 max_hw_sectors_kb = q->max_hw_sectors >> 1,
3287 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
3288 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
3289 int ra_kb;
3290
3291 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
3292 return -EINVAL;
3293
3294
3295
3296
3297 spin_lock_irq(q->queue_lock);
3298
3299
3300
3301 ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
3302 if (ra_kb > max_sectors_kb)
3303 q->backing_dev_info.ra_pages =
3304 max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
3305
3306 q->max_sectors = max_sectors_kb << 1;
3307 spin_unlock_irq(q->queue_lock);
3308
3309 return ret;
3310}
3311
3312static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
3313{
3314 int max_hw_sectors_kb = q->max_hw_sectors >> 1;
3315
3316 return queue_var_show(max_hw_sectors_kb, (page));
3317}
3318
3319
3320static struct queue_sysfs_entry queue_requests_entry = {
3321 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
3322 .show = queue_requests_show,
3323 .store = queue_requests_store,
3324};
3325
3326static struct queue_sysfs_entry queue_ra_entry = {
3327 .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
3328 .show = queue_ra_show,
3329 .store = queue_ra_store,
3330};
3331
3332static struct queue_sysfs_entry queue_max_sectors_entry = {
3333 .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
3334 .show = queue_max_sectors_show,
3335 .store = queue_max_sectors_store,
3336};
3337
3338static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
3339 .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
3340 .show = queue_max_hw_sectors_show,
3341};
3342
3343static struct queue_sysfs_entry queue_iosched_entry = {
3344 .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
3345 .show = elv_iosched_show,
3346 .store = elv_iosched_store,
3347};
3348
3349static struct attribute *default_attrs[] = {
3350 &queue_requests_entry.attr,
3351 &queue_ra_entry.attr,
3352 &queue_max_hw_sectors_entry.attr,
3353 &queue_max_sectors_entry.attr,
3354 &queue_iosched_entry.attr,
3355 NULL,
3356};
3357
3358#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
3359
3360static ssize_t
3361queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
3362{
3363 struct queue_sysfs_entry *entry = to_queue(attr);
3364 struct request_queue *q;
3365
3366 q = container_of(kobj, struct request_queue, kobj);
3367 if (!entry->show)
3368 return 0;
3369
3370 return entry->show(q, page);
3371}
3372
3373static ssize_t
3374queue_attr_store(struct kobject *kobj, struct attribute *attr,
3375 const char *page, size_t length)
3376{
3377 struct queue_sysfs_entry *entry = to_queue(attr);
3378 struct request_queue *q;
3379
3380 q = container_of(kobj, struct request_queue, kobj);
3381 if (!entry->store)
3382 return -EINVAL;
3383
3384 return entry->store(q, page, length);
3385}
3386
3387static struct sysfs_ops queue_sysfs_ops = {
3388 .show = queue_attr_show,
3389 .store = queue_attr_store,
3390};
3391
3392struct kobj_type queue_ktype = {
3393 .sysfs_ops = &queue_sysfs_ops,
3394 .default_attrs = default_attrs,
3395};
3396
3397int blk_register_queue(struct gendisk *disk)
3398{
3399 int ret;
3400
3401 request_queue_t *q = disk->queue;
3402
3403 if (!q || !q->request_fn)
3404 return -ENXIO;
3405
3406 q->kobj.parent = kobject_get(&disk->kobj);
3407 if (!q->kobj.parent)
3408 return -EBUSY;
3409
3410 snprintf(q->kobj.name, KOBJ_NAME_LEN, "%s", "queue");
3411 q->kobj.ktype = &queue_ktype;
3412
3413 ret = kobject_register(&q->kobj);
3414 if (ret < 0)
3415 return ret;
3416
3417 ret = elv_register_queue(q);
3418 if (ret) {
3419 kobject_unregister(&q->kobj);
3420 return ret;
3421 }
3422
3423 return 0;
3424}
3425
3426void blk_unregister_queue(struct gendisk *disk)
3427{
3428 request_queue_t *q = disk->queue;
3429
3430 if (q && q->request_fn) {
3431 elv_unregister_queue(q);
3432
3433 kobject_unregister(&q->kobj);
3434 kobject_put(&disk->kobj);
3435 }
3436}
3437