1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/highmem.h>
20#include <linux/mm.h>
21#include <linux/kernel_stat.h>
22#include <linux/string.h>
23#include <linux/init.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h>
29#include <linux/fault-inject.h>
30#include <linux/list_sort.h>
31#include <linux/delay.h>
32
33#define CREATE_TRACE_POINTS
34#include <trace/events/block.h>
35
36#include "blk.h"
37
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
39EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
40EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
41
42DEFINE_IDA(blk_queue_ida);
43
44
45
46
47static struct kmem_cache *request_cachep;
48
49
50
51
52struct kmem_cache *blk_requestq_cachep;
53
54
55
56
57static struct workqueue_struct *kblockd_workqueue;
58
59static void drive_stat_acct(struct request *rq, int new_io)
60{
61 struct hd_struct *part;
62 int rw = rq_data_dir(rq);
63 int cpu;
64
65 if (!blk_do_io_stat(rq))
66 return;
67
68 cpu = part_stat_lock();
69
70 if (!new_io) {
71 part = rq->part;
72 part_stat_inc(cpu, part, merges[rw]);
73 } else {
74 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
75 if (!hd_struct_try_get(part)) {
76
77
78
79
80
81
82
83
84 part = &rq->rq_disk->part0;
85 hd_struct_get(part);
86 }
87 part_round_stats(cpu, part);
88 part_inc_in_flight(part, rw);
89 rq->part = part;
90 }
91
92 part_stat_unlock();
93}
94
95void blk_queue_congestion_threshold(struct request_queue *q)
96{
97 int nr;
98
99 nr = q->nr_requests - (q->nr_requests / 8) + 1;
100 if (nr > q->nr_requests)
101 nr = q->nr_requests;
102 q->nr_congestion_on = nr;
103
104 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
105 if (nr < 1)
106 nr = 1;
107 q->nr_congestion_off = nr;
108}
109
110
111
112
113
114
115
116
117
118
119struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
120{
121 struct backing_dev_info *ret = NULL;
122 struct request_queue *q = bdev_get_queue(bdev);
123
124 if (q)
125 ret = &q->backing_dev_info;
126 return ret;
127}
128EXPORT_SYMBOL(blk_get_backing_dev_info);
129
130void blk_rq_init(struct request_queue *q, struct request *rq)
131{
132 memset(rq, 0, sizeof(*rq));
133
134 INIT_LIST_HEAD(&rq->queuelist);
135 INIT_LIST_HEAD(&rq->timeout_list);
136 rq->cpu = -1;
137 rq->q = q;
138 rq->__sector = (sector_t) -1;
139 INIT_HLIST_NODE(&rq->hash);
140 RB_CLEAR_NODE(&rq->rb_node);
141 rq->cmd = rq->__cmd;
142 rq->cmd_len = BLK_MAX_CDB;
143 rq->tag = -1;
144 rq->ref_count = 1;
145 rq->start_time = jiffies;
146 set_start_time_ns(rq);
147 rq->part = NULL;
148}
149EXPORT_SYMBOL(blk_rq_init);
150
151static void req_bio_endio(struct request *rq, struct bio *bio,
152 unsigned int nbytes, int error)
153{
154 if (error)
155 clear_bit(BIO_UPTODATE, &bio->bi_flags);
156 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
157 error = -EIO;
158
159 if (unlikely(nbytes > bio->bi_size)) {
160 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
161 __func__, nbytes, bio->bi_size);
162 nbytes = bio->bi_size;
163 }
164
165 if (unlikely(rq->cmd_flags & REQ_QUIET))
166 set_bit(BIO_QUIET, &bio->bi_flags);
167
168 bio->bi_size -= nbytes;
169 bio->bi_sector += (nbytes >> 9);
170
171 if (bio_integrity(bio))
172 bio_integrity_advance(bio, nbytes);
173
174
175 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
176 bio_endio(bio, error);
177}
178
179void blk_dump_rq_flags(struct request *rq, char *msg)
180{
181 int bit;
182
183 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
184 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
185 rq->cmd_flags);
186
187 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
188 (unsigned long long)blk_rq_pos(rq),
189 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
190 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
191 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
192
193 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
194 printk(KERN_INFO " cdb: ");
195 for (bit = 0; bit < BLK_MAX_CDB; bit++)
196 printk("%02x ", rq->cmd[bit]);
197 printk("\n");
198 }
199}
200EXPORT_SYMBOL(blk_dump_rq_flags);
201
202static void blk_delay_work(struct work_struct *work)
203{
204 struct request_queue *q;
205
206 q = container_of(work, struct request_queue, delay_work.work);
207 spin_lock_irq(q->queue_lock);
208 __blk_run_queue(q);
209 spin_unlock_irq(q->queue_lock);
210}
211
212
213
214
215
216
217
218
219
220
221
222void blk_delay_queue(struct request_queue *q, unsigned long msecs)
223{
224 queue_delayed_work(kblockd_workqueue, &q->delay_work,
225 msecs_to_jiffies(msecs));
226}
227EXPORT_SYMBOL(blk_delay_queue);
228
229
230
231
232
233
234
235
236
237
238void blk_start_queue(struct request_queue *q)
239{
240 WARN_ON(!irqs_disabled());
241
242 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
243 __blk_run_queue(q);
244}
245EXPORT_SYMBOL(blk_start_queue);
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261void blk_stop_queue(struct request_queue *q)
262{
263 __cancel_delayed_work(&q->delay_work);
264 queue_flag_set(QUEUE_FLAG_STOPPED, q);
265}
266EXPORT_SYMBOL(blk_stop_queue);
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286void blk_sync_queue(struct request_queue *q)
287{
288 del_timer_sync(&q->timeout);
289 cancel_delayed_work_sync(&q->delay_work);
290}
291EXPORT_SYMBOL(blk_sync_queue);
292
293
294
295
296
297
298
299
300
301void __blk_run_queue(struct request_queue *q)
302{
303 if (unlikely(blk_queue_stopped(q)))
304 return;
305
306 q->request_fn(q);
307}
308EXPORT_SYMBOL(__blk_run_queue);
309
310
311
312
313
314
315
316
317
318void blk_run_queue_async(struct request_queue *q)
319{
320 if (likely(!blk_queue_stopped(q))) {
321 __cancel_delayed_work(&q->delay_work);
322 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
323 }
324}
325EXPORT_SYMBOL(blk_run_queue_async);
326
327
328
329
330
331
332
333
334
335void blk_run_queue(struct request_queue *q)
336{
337 unsigned long flags;
338
339 spin_lock_irqsave(q->queue_lock, flags);
340 __blk_run_queue(q);
341 spin_unlock_irqrestore(q->queue_lock, flags);
342}
343EXPORT_SYMBOL(blk_run_queue);
344
345void blk_put_queue(struct request_queue *q)
346{
347 kobject_put(&q->kobj);
348}
349EXPORT_SYMBOL(blk_put_queue);
350
351
352
353
354
355
356
357
358
359
360void blk_drain_queue(struct request_queue *q, bool drain_all)
361{
362 while (true) {
363 bool drain = false;
364 int i;
365
366 spin_lock_irq(q->queue_lock);
367
368 elv_drain_elevator(q);
369 if (drain_all)
370 blk_throtl_drain(q);
371
372
373
374
375
376
377
378 if (!list_empty(&q->queue_head))
379 __blk_run_queue(q);
380
381 drain |= q->rq.elvpriv;
382
383
384
385
386
387
388 if (drain_all) {
389 drain |= !list_empty(&q->queue_head);
390 for (i = 0; i < 2; i++) {
391 drain |= q->rq.count[i];
392 drain |= q->in_flight[i];
393 drain |= !list_empty(&q->flush_queue[i]);
394 }
395 }
396
397 spin_unlock_irq(q->queue_lock);
398
399 if (!drain)
400 break;
401 msleep(10);
402 }
403}
404
405
406
407
408
409
410
411
412void blk_cleanup_queue(struct request_queue *q)
413{
414 spinlock_t *lock = q->queue_lock;
415
416
417 mutex_lock(&q->sysfs_lock);
418 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
419
420 spin_lock_irq(lock);
421 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
422 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
423 queue_flag_set(QUEUE_FLAG_DEAD, q);
424
425 if (q->queue_lock != &q->__queue_lock)
426 q->queue_lock = &q->__queue_lock;
427
428 spin_unlock_irq(lock);
429 mutex_unlock(&q->sysfs_lock);
430
431
432
433
434
435
436 if (q->elevator)
437 blk_drain_queue(q, true);
438
439
440 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
441 blk_sync_queue(q);
442
443
444 blk_put_queue(q);
445}
446EXPORT_SYMBOL(blk_cleanup_queue);
447
448static int blk_init_free_list(struct request_queue *q)
449{
450 struct request_list *rl = &q->rq;
451
452 if (unlikely(rl->rq_pool))
453 return 0;
454
455 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
456 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
457 rl->elvpriv = 0;
458 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
459 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
460
461 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
462 mempool_free_slab, request_cachep, q->node);
463
464 if (!rl->rq_pool)
465 return -ENOMEM;
466
467 return 0;
468}
469
470struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
471{
472 return blk_alloc_queue_node(gfp_mask, -1);
473}
474EXPORT_SYMBOL(blk_alloc_queue);
475
476struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
477{
478 struct request_queue *q;
479 int err;
480
481 q = kmem_cache_alloc_node(blk_requestq_cachep,
482 gfp_mask | __GFP_ZERO, node_id);
483 if (!q)
484 return NULL;
485
486 q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL);
487 if (q->id < 0)
488 goto fail_q;
489
490 q->backing_dev_info.ra_pages =
491 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
492 q->backing_dev_info.state = 0;
493 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
494 q->backing_dev_info.name = "block";
495 q->node = node_id;
496
497 err = bdi_init(&q->backing_dev_info);
498 if (err)
499 goto fail_id;
500
501 if (blk_throtl_init(q))
502 goto fail_id;
503
504 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
505 laptop_mode_timer_fn, (unsigned long) q);
506 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
507 INIT_LIST_HEAD(&q->timeout_list);
508 INIT_LIST_HEAD(&q->icq_list);
509 INIT_LIST_HEAD(&q->flush_queue[0]);
510 INIT_LIST_HEAD(&q->flush_queue[1]);
511 INIT_LIST_HEAD(&q->flush_data_in_flight);
512 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
513
514 kobject_init(&q->kobj, &blk_queue_ktype);
515
516 mutex_init(&q->sysfs_lock);
517 spin_lock_init(&q->__queue_lock);
518
519
520
521
522
523 q->queue_lock = &q->__queue_lock;
524
525 return q;
526
527fail_id:
528 ida_simple_remove(&blk_queue_ida, q->id);
529fail_q:
530 kmem_cache_free(blk_requestq_cachep, q);
531 return NULL;
532}
533EXPORT_SYMBOL(blk_alloc_queue_node);
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
569{
570 return blk_init_queue_node(rfn, lock, -1);
571}
572EXPORT_SYMBOL(blk_init_queue);
573
574struct request_queue *
575blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
576{
577 struct request_queue *uninit_q, *q;
578
579 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
580 if (!uninit_q)
581 return NULL;
582
583 q = blk_init_allocated_queue(uninit_q, rfn, lock);
584 if (!q)
585 blk_cleanup_queue(uninit_q);
586
587 return q;
588}
589EXPORT_SYMBOL(blk_init_queue_node);
590
591struct request_queue *
592blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
593 spinlock_t *lock)
594{
595 if (!q)
596 return NULL;
597
598 if (blk_init_free_list(q))
599 return NULL;
600
601 q->request_fn = rfn;
602 q->prep_rq_fn = NULL;
603 q->unprep_rq_fn = NULL;
604 q->queue_flags = QUEUE_FLAG_DEFAULT;
605
606
607 if (lock)
608 q->queue_lock = lock;
609
610
611
612
613 blk_queue_make_request(q, blk_queue_bio);
614
615 q->sg_reserved_size = INT_MAX;
616
617
618
619
620 if (!elevator_init(q, NULL)) {
621 blk_queue_congestion_threshold(q);
622 return q;
623 }
624
625 return NULL;
626}
627EXPORT_SYMBOL(blk_init_allocated_queue);
628
629bool blk_get_queue(struct request_queue *q)
630{
631 if (likely(!blk_queue_dead(q))) {
632 __blk_get_queue(q);
633 return true;
634 }
635
636 return false;
637}
638EXPORT_SYMBOL(blk_get_queue);
639
640static inline void blk_free_request(struct request_queue *q, struct request *rq)
641{
642 if (rq->cmd_flags & REQ_ELVPRIV) {
643 elv_put_request(q, rq);
644 if (rq->elv.icq)
645 put_io_context(rq->elv.icq->ioc);
646 }
647
648 mempool_free(rq, q->rq.rq_pool);
649}
650
651static struct request *
652blk_alloc_request(struct request_queue *q, struct io_cq *icq,
653 unsigned int flags, gfp_t gfp_mask)
654{
655 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
656
657 if (!rq)
658 return NULL;
659
660 blk_rq_init(q, rq);
661
662 rq->cmd_flags = flags | REQ_ALLOCED;
663
664 if (flags & REQ_ELVPRIV) {
665 rq->elv.icq = icq;
666 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
667 mempool_free(rq, q->rq.rq_pool);
668 return NULL;
669 }
670
671 if (icq)
672 get_io_context(icq->ioc);
673 }
674
675 return rq;
676}
677
678
679
680
681
682static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
683{
684 if (!ioc)
685 return 0;
686
687
688
689
690
691
692 return ioc->nr_batch_requests == q->nr_batching ||
693 (ioc->nr_batch_requests > 0
694 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
695}
696
697
698
699
700
701
702
703static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
704{
705 if (!ioc || ioc_batching(q, ioc))
706 return;
707
708 ioc->nr_batch_requests = q->nr_batching;
709 ioc->last_waited = jiffies;
710}
711
712static void __freed_request(struct request_queue *q, int sync)
713{
714 struct request_list *rl = &q->rq;
715
716 if (rl->count[sync] < queue_congestion_off_threshold(q))
717 blk_clear_queue_congested(q, sync);
718
719 if (rl->count[sync] + 1 <= q->nr_requests) {
720 if (waitqueue_active(&rl->wait[sync]))
721 wake_up(&rl->wait[sync]);
722
723 blk_clear_queue_full(q, sync);
724 }
725}
726
727
728
729
730
731static void freed_request(struct request_queue *q, unsigned int flags)
732{
733 struct request_list *rl = &q->rq;
734 int sync = rw_is_sync(flags);
735
736 rl->count[sync]--;
737 if (flags & REQ_ELVPRIV)
738 rl->elvpriv--;
739
740 __freed_request(q, sync);
741
742 if (unlikely(rl->starved[sync ^ 1]))
743 __freed_request(q, sync ^ 1);
744}
745
746
747
748
749
750static bool blk_rq_should_init_elevator(struct bio *bio)
751{
752 if (!bio)
753 return true;
754
755
756
757
758
759 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
760 return false;
761
762 return true;
763}
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779static struct request *get_request(struct request_queue *q, int rw_flags,
780 struct bio *bio, gfp_t gfp_mask)
781{
782 struct request *rq = NULL;
783 struct request_list *rl = &q->rq;
784 struct elevator_type *et;
785 struct io_context *ioc;
786 struct io_cq *icq = NULL;
787 const bool is_sync = rw_is_sync(rw_flags) != 0;
788 bool retried = false;
789 int may_queue;
790retry:
791 et = q->elevator->type;
792 ioc = current->io_context;
793
794 if (unlikely(blk_queue_dead(q)))
795 return NULL;
796
797 may_queue = elv_may_queue(q, rw_flags);
798 if (may_queue == ELV_MQUEUE_NO)
799 goto rq_starved;
800
801 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
802 if (rl->count[is_sync]+1 >= q->nr_requests) {
803
804
805
806
807
808
809 if (!ioc && !retried) {
810 spin_unlock_irq(q->queue_lock);
811 create_io_context(current, gfp_mask, q->node);
812 spin_lock_irq(q->queue_lock);
813 retried = true;
814 goto retry;
815 }
816
817
818
819
820
821
822
823 if (!blk_queue_full(q, is_sync)) {
824 ioc_set_batching(q, ioc);
825 blk_set_queue_full(q, is_sync);
826 } else {
827 if (may_queue != ELV_MQUEUE_MUST
828 && !ioc_batching(q, ioc)) {
829
830
831
832
833
834 goto out;
835 }
836 }
837 }
838 blk_set_queue_congested(q, is_sync);
839 }
840
841
842
843
844
845
846 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
847 goto out;
848
849 rl->count[is_sync]++;
850 rl->starved[is_sync] = 0;
851
852
853
854
855
856
857
858
859
860
861
862 if (blk_rq_should_init_elevator(bio) &&
863 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
864 rw_flags |= REQ_ELVPRIV;
865 rl->elvpriv++;
866 if (et->icq_cache && ioc)
867 icq = ioc_lookup_icq(ioc, q);
868 }
869
870 if (blk_queue_io_stat(q))
871 rw_flags |= REQ_IO_STAT;
872 spin_unlock_irq(q->queue_lock);
873
874
875 if ((rw_flags & REQ_ELVPRIV) && unlikely(et->icq_cache && !icq)) {
876 icq = ioc_create_icq(q, gfp_mask);
877 if (!icq)
878 goto fail_icq;
879 }
880
881 rq = blk_alloc_request(q, icq, rw_flags, gfp_mask);
882
883fail_icq:
884 if (unlikely(!rq)) {
885
886
887
888
889
890
891
892 spin_lock_irq(q->queue_lock);
893 freed_request(q, rw_flags);
894
895
896
897
898
899
900
901
902rq_starved:
903 if (unlikely(rl->count[is_sync] == 0))
904 rl->starved[is_sync] = 1;
905
906 goto out;
907 }
908
909
910
911
912
913
914
915 if (ioc_batching(q, ioc))
916 ioc->nr_batch_requests--;
917
918 trace_block_getrq(q, bio, rw_flags & 1);
919out:
920 return rq;
921}
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936static struct request *get_request_wait(struct request_queue *q, int rw_flags,
937 struct bio *bio)
938{
939 const bool is_sync = rw_is_sync(rw_flags) != 0;
940 struct request *rq;
941
942 rq = get_request(q, rw_flags, bio, GFP_NOIO);
943 while (!rq) {
944 DEFINE_WAIT(wait);
945 struct request_list *rl = &q->rq;
946
947 if (unlikely(blk_queue_dead(q)))
948 return NULL;
949
950 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
951 TASK_UNINTERRUPTIBLE);
952
953 trace_block_sleeprq(q, bio, rw_flags & 1);
954
955 spin_unlock_irq(q->queue_lock);
956 io_schedule();
957
958
959
960
961
962
963
964 create_io_context(current, GFP_NOIO, q->node);
965 ioc_set_batching(q, current->io_context);
966
967 spin_lock_irq(q->queue_lock);
968 finish_wait(&rl->wait[is_sync], &wait);
969
970 rq = get_request(q, rw_flags, bio, GFP_NOIO);
971 };
972
973 return rq;
974}
975
976struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
977{
978 struct request *rq;
979
980 BUG_ON(rw != READ && rw != WRITE);
981
982 spin_lock_irq(q->queue_lock);
983 if (gfp_mask & __GFP_WAIT)
984 rq = get_request_wait(q, rw, NULL);
985 else
986 rq = get_request(q, rw, NULL, gfp_mask);
987 if (!rq)
988 spin_unlock_irq(q->queue_lock);
989
990
991 return rq;
992}
993EXPORT_SYMBOL(blk_get_request);
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026struct request *blk_make_request(struct request_queue *q, struct bio *bio,
1027 gfp_t gfp_mask)
1028{
1029 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
1030
1031 if (unlikely(!rq))
1032 return ERR_PTR(-ENOMEM);
1033
1034 for_each_bio(bio) {
1035 struct bio *bounce_bio = bio;
1036 int ret;
1037
1038 blk_queue_bounce(q, &bounce_bio);
1039 ret = blk_rq_append_bio(q, rq, bounce_bio);
1040 if (unlikely(ret)) {
1041 blk_put_request(rq);
1042 return ERR_PTR(ret);
1043 }
1044 }
1045
1046 return rq;
1047}
1048EXPORT_SYMBOL(blk_make_request);
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060void blk_requeue_request(struct request_queue *q, struct request *rq)
1061{
1062 blk_delete_timer(rq);
1063 blk_clear_rq_complete(rq);
1064 trace_block_rq_requeue(q, rq);
1065
1066 if (blk_rq_tagged(rq))
1067 blk_queue_end_tag(q, rq);
1068
1069 BUG_ON(blk_queued_rq(rq));
1070
1071 elv_requeue_request(q, rq);
1072}
1073EXPORT_SYMBOL(blk_requeue_request);
1074
1075static void add_acct_request(struct request_queue *q, struct request *rq,
1076 int where)
1077{
1078 drive_stat_acct(rq, 1);
1079 __elv_add_request(q, rq, where);
1080}
1081
1082static void part_round_stats_single(int cpu, struct hd_struct *part,
1083 unsigned long now)
1084{
1085 if (now == part->stamp)
1086 return;
1087
1088 if (part_in_flight(part)) {
1089 __part_stat_add(cpu, part, time_in_queue,
1090 part_in_flight(part) * (now - part->stamp));
1091 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1092 }
1093 part->stamp = now;
1094}
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112void part_round_stats(int cpu, struct hd_struct *part)
1113{
1114 unsigned long now = jiffies;
1115
1116 if (part->partno)
1117 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1118 part_round_stats_single(cpu, part, now);
1119}
1120EXPORT_SYMBOL_GPL(part_round_stats);
1121
1122
1123
1124
1125void __blk_put_request(struct request_queue *q, struct request *req)
1126{
1127 if (unlikely(!q))
1128 return;
1129 if (unlikely(--req->ref_count))
1130 return;
1131
1132 elv_completed_request(q, req);
1133
1134
1135 WARN_ON(req->bio != NULL);
1136
1137
1138
1139
1140
1141 if (req->cmd_flags & REQ_ALLOCED) {
1142 unsigned int flags = req->cmd_flags;
1143
1144 BUG_ON(!list_empty(&req->queuelist));
1145 BUG_ON(!hlist_unhashed(&req->hash));
1146
1147 blk_free_request(q, req);
1148 freed_request(q, flags);
1149 }
1150}
1151EXPORT_SYMBOL_GPL(__blk_put_request);
1152
1153void blk_put_request(struct request *req)
1154{
1155 unsigned long flags;
1156 struct request_queue *q = req->q;
1157
1158 spin_lock_irqsave(q->queue_lock, flags);
1159 __blk_put_request(q, req);
1160 spin_unlock_irqrestore(q->queue_lock, flags);
1161}
1162EXPORT_SYMBOL(blk_put_request);
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177void blk_add_request_payload(struct request *rq, struct page *page,
1178 unsigned int len)
1179{
1180 struct bio *bio = rq->bio;
1181
1182 bio->bi_io_vec->bv_page = page;
1183 bio->bi_io_vec->bv_offset = 0;
1184 bio->bi_io_vec->bv_len = len;
1185
1186 bio->bi_size = len;
1187 bio->bi_vcnt = 1;
1188 bio->bi_phys_segments = 1;
1189
1190 rq->__data_len = rq->resid_len = len;
1191 rq->nr_phys_segments = 1;
1192 rq->buffer = bio_data(bio);
1193}
1194EXPORT_SYMBOL_GPL(blk_add_request_payload);
1195
1196static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1197 struct bio *bio)
1198{
1199 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1200
1201 if (!ll_back_merge_fn(q, req, bio))
1202 return false;
1203
1204 trace_block_bio_backmerge(q, bio);
1205
1206 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1207 blk_rq_set_mixed_merge(req);
1208
1209 req->biotail->bi_next = bio;
1210 req->biotail = bio;
1211 req->__data_len += bio->bi_size;
1212 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1213
1214 drive_stat_acct(req, 0);
1215 return true;
1216}
1217
1218static bool bio_attempt_front_merge(struct request_queue *q,
1219 struct request *req, struct bio *bio)
1220{
1221 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1222
1223 if (!ll_front_merge_fn(q, req, bio))
1224 return false;
1225
1226 trace_block_bio_frontmerge(q, bio);
1227
1228 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1229 blk_rq_set_mixed_merge(req);
1230
1231 bio->bi_next = req->bio;
1232 req->bio = bio;
1233
1234
1235
1236
1237
1238
1239 req->buffer = bio_data(bio);
1240 req->__sector = bio->bi_sector;
1241 req->__data_len += bio->bi_size;
1242 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1243
1244 drive_stat_acct(req, 0);
1245 return true;
1246}
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
1266 unsigned int *request_count)
1267{
1268 struct blk_plug *plug;
1269 struct request *rq;
1270 bool ret = false;
1271
1272 plug = current->plug;
1273 if (!plug)
1274 goto out;
1275 *request_count = 0;
1276
1277 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1278 int el_ret;
1279
1280 (*request_count)++;
1281
1282 if (rq->q != q || !blk_rq_merge_ok(rq, bio))
1283 continue;
1284
1285 el_ret = blk_try_merge(rq, bio);
1286 if (el_ret == ELEVATOR_BACK_MERGE) {
1287 ret = bio_attempt_back_merge(q, rq, bio);
1288 if (ret)
1289 break;
1290 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1291 ret = bio_attempt_front_merge(q, rq, bio);
1292 if (ret)
1293 break;
1294 }
1295 }
1296out:
1297 return ret;
1298}
1299
1300void init_request_from_bio(struct request *req, struct bio *bio)
1301{
1302 req->cmd_type = REQ_TYPE_FS;
1303
1304 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1305 if (bio->bi_rw & REQ_RAHEAD)
1306 req->cmd_flags |= REQ_FAILFAST_MASK;
1307
1308 req->errors = 0;
1309 req->__sector = bio->bi_sector;
1310 req->ioprio = bio_prio(bio);
1311 blk_rq_bio_prep(req->q, req, bio);
1312}
1313
1314void blk_queue_bio(struct request_queue *q, struct bio *bio)
1315{
1316 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1317 struct blk_plug *plug;
1318 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1319 struct request *req;
1320 unsigned int request_count = 0;
1321
1322
1323
1324
1325
1326
1327 blk_queue_bounce(q, &bio);
1328
1329 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1330 spin_lock_irq(q->queue_lock);
1331 where = ELEVATOR_INSERT_FLUSH;
1332 goto get_rq;
1333 }
1334
1335
1336
1337
1338
1339 if (attempt_plug_merge(q, bio, &request_count))
1340 return;
1341
1342 spin_lock_irq(q->queue_lock);
1343
1344 el_ret = elv_merge(q, &req, bio);
1345 if (el_ret == ELEVATOR_BACK_MERGE) {
1346 if (bio_attempt_back_merge(q, req, bio)) {
1347 elv_bio_merged(q, req, bio);
1348 if (!attempt_back_merge(q, req))
1349 elv_merged_request(q, req, el_ret);
1350 goto out_unlock;
1351 }
1352 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1353 if (bio_attempt_front_merge(q, req, bio)) {
1354 elv_bio_merged(q, req, bio);
1355 if (!attempt_front_merge(q, req))
1356 elv_merged_request(q, req, el_ret);
1357 goto out_unlock;
1358 }
1359 }
1360
1361get_rq:
1362
1363
1364
1365
1366
1367 rw_flags = bio_data_dir(bio);
1368 if (sync)
1369 rw_flags |= REQ_SYNC;
1370
1371
1372
1373
1374
1375 req = get_request_wait(q, rw_flags, bio);
1376 if (unlikely(!req)) {
1377 bio_endio(bio, -ENODEV);
1378 goto out_unlock;
1379 }
1380
1381
1382
1383
1384
1385
1386
1387 init_request_from_bio(req, bio);
1388
1389 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
1390 req->cpu = raw_smp_processor_id();
1391
1392 plug = current->plug;
1393 if (plug) {
1394
1395
1396
1397
1398
1399
1400 if (list_empty(&plug->list))
1401 trace_block_plug(q);
1402 else {
1403 if (!plug->should_sort) {
1404 struct request *__rq;
1405
1406 __rq = list_entry_rq(plug->list.prev);
1407 if (__rq->q != q)
1408 plug->should_sort = 1;
1409 }
1410 if (request_count >= BLK_MAX_REQUEST_COUNT) {
1411 blk_flush_plug_list(plug, false);
1412 trace_block_plug(q);
1413 }
1414 }
1415 list_add_tail(&req->queuelist, &plug->list);
1416 drive_stat_acct(req, 1);
1417 } else {
1418 spin_lock_irq(q->queue_lock);
1419 add_acct_request(q, req, where);
1420 __blk_run_queue(q);
1421out_unlock:
1422 spin_unlock_irq(q->queue_lock);
1423 }
1424}
1425EXPORT_SYMBOL_GPL(blk_queue_bio);
1426
1427
1428
1429
1430static inline void blk_partition_remap(struct bio *bio)
1431{
1432 struct block_device *bdev = bio->bi_bdev;
1433
1434 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1435 struct hd_struct *p = bdev->bd_part;
1436
1437 bio->bi_sector += p->start_sect;
1438 bio->bi_bdev = bdev->bd_contains;
1439
1440 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1441 bdev->bd_dev,
1442 bio->bi_sector - p->start_sect);
1443 }
1444}
1445
1446static void handle_bad_sector(struct bio *bio)
1447{
1448 char b[BDEVNAME_SIZE];
1449
1450 printk(KERN_INFO "attempt to access beyond end of device\n");
1451 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1452 bdevname(bio->bi_bdev, b),
1453 bio->bi_rw,
1454 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1455 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1456
1457 set_bit(BIO_EOF, &bio->bi_flags);
1458}
1459
1460#ifdef CONFIG_FAIL_MAKE_REQUEST
1461
1462static DECLARE_FAULT_ATTR(fail_make_request);
1463
1464static int __init setup_fail_make_request(char *str)
1465{
1466 return setup_fault_attr(&fail_make_request, str);
1467}
1468__setup("fail_make_request=", setup_fail_make_request);
1469
1470static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1471{
1472 return part->make_it_fail && should_fail(&fail_make_request, bytes);
1473}
1474
1475static int __init fail_make_request_debugfs(void)
1476{
1477 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1478 NULL, &fail_make_request);
1479
1480 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
1481}
1482
1483late_initcall(fail_make_request_debugfs);
1484
1485#else
1486
1487static inline bool should_fail_request(struct hd_struct *part,
1488 unsigned int bytes)
1489{
1490 return false;
1491}
1492
1493#endif
1494
1495
1496
1497
1498static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1499{
1500 sector_t maxsector;
1501
1502 if (!nr_sectors)
1503 return 0;
1504
1505
1506 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1507 if (maxsector) {
1508 sector_t sector = bio->bi_sector;
1509
1510 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1511
1512
1513
1514
1515
1516 handle_bad_sector(bio);
1517 return 1;
1518 }
1519 }
1520
1521 return 0;
1522}
1523
1524static noinline_for_stack bool
1525generic_make_request_checks(struct bio *bio)
1526{
1527 struct request_queue *q;
1528 int nr_sectors = bio_sectors(bio);
1529 int err = -EIO;
1530 char b[BDEVNAME_SIZE];
1531 struct hd_struct *part;
1532
1533 might_sleep();
1534
1535 if (bio_check_eod(bio, nr_sectors))
1536 goto end_io;
1537
1538 q = bdev_get_queue(bio->bi_bdev);
1539 if (unlikely(!q)) {
1540 printk(KERN_ERR
1541 "generic_make_request: Trying to access "
1542 "nonexistent block-device %s (%Lu)\n",
1543 bdevname(bio->bi_bdev, b),
1544 (long long) bio->bi_sector);
1545 goto end_io;
1546 }
1547
1548 if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
1549 nr_sectors > queue_max_hw_sectors(q))) {
1550 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1551 bdevname(bio->bi_bdev, b),
1552 bio_sectors(bio),
1553 queue_max_hw_sectors(q));
1554 goto end_io;
1555 }
1556
1557 part = bio->bi_bdev->bd_part;
1558 if (should_fail_request(part, bio->bi_size) ||
1559 should_fail_request(&part_to_disk(part)->part0,
1560 bio->bi_size))
1561 goto end_io;
1562
1563
1564
1565
1566
1567 blk_partition_remap(bio);
1568
1569 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1570 goto end_io;
1571
1572 if (bio_check_eod(bio, nr_sectors))
1573 goto end_io;
1574
1575
1576
1577
1578
1579
1580 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
1581 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
1582 if (!nr_sectors) {
1583 err = 0;
1584 goto end_io;
1585 }
1586 }
1587
1588 if ((bio->bi_rw & REQ_DISCARD) &&
1589 (!blk_queue_discard(q) ||
1590 ((bio->bi_rw & REQ_SECURE) &&
1591 !blk_queue_secdiscard(q)))) {
1592 err = -EOPNOTSUPP;
1593 goto end_io;
1594 }
1595
1596 if (blk_throtl_bio(q, bio))
1597 return false;
1598
1599 trace_block_bio_queue(q, bio);
1600 return true;
1601
1602end_io:
1603 bio_endio(bio, err);
1604 return false;
1605}
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631void generic_make_request(struct bio *bio)
1632{
1633 struct bio_list bio_list_on_stack;
1634
1635 if (!generic_make_request_checks(bio))
1636 return;
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648 if (current->bio_list) {
1649 bio_list_add(current->bio_list, bio);
1650 return;
1651 }
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667 BUG_ON(bio->bi_next);
1668 bio_list_init(&bio_list_on_stack);
1669 current->bio_list = &bio_list_on_stack;
1670 do {
1671 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
1672
1673 q->make_request_fn(q, bio);
1674
1675 bio = bio_list_pop(current->bio_list);
1676 } while (bio);
1677 current->bio_list = NULL;
1678}
1679EXPORT_SYMBOL(generic_make_request);
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691void submit_bio(int rw, struct bio *bio)
1692{
1693 int count = bio_sectors(bio);
1694
1695 bio->bi_rw |= rw;
1696
1697
1698
1699
1700
1701 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
1702 if (rw & WRITE) {
1703 count_vm_events(PGPGOUT, count);
1704 } else {
1705 task_io_account_read(bio->bi_size);
1706 count_vm_events(PGPGIN, count);
1707 }
1708
1709 if (unlikely(block_dump)) {
1710 char b[BDEVNAME_SIZE];
1711 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1712 current->comm, task_pid_nr(current),
1713 (rw & WRITE) ? "WRITE" : "READ",
1714 (unsigned long long)bio->bi_sector,
1715 bdevname(bio->bi_bdev, b),
1716 count);
1717 }
1718 }
1719
1720 generic_make_request(bio);
1721}
1722EXPORT_SYMBOL(submit_bio);
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1746{
1747 if (rq->cmd_flags & REQ_DISCARD)
1748 return 0;
1749
1750 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1751 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1752 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1753 return -EIO;
1754 }
1755
1756
1757
1758
1759
1760
1761
1762 blk_recalc_rq_segments(rq);
1763 if (rq->nr_phys_segments > queue_max_segments(q)) {
1764 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1765 return -EIO;
1766 }
1767
1768 return 0;
1769}
1770EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1771
1772
1773
1774
1775
1776
1777int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1778{
1779 unsigned long flags;
1780 int where = ELEVATOR_INSERT_BACK;
1781
1782 if (blk_rq_check_limits(q, rq))
1783 return -EIO;
1784
1785 if (rq->rq_disk &&
1786 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
1787 return -EIO;
1788
1789 spin_lock_irqsave(q->queue_lock, flags);
1790 if (unlikely(blk_queue_dead(q))) {
1791 spin_unlock_irqrestore(q->queue_lock, flags);
1792 return -ENODEV;
1793 }
1794
1795
1796
1797
1798
1799 BUG_ON(blk_queued_rq(rq));
1800
1801 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1802 where = ELEVATOR_INSERT_FLUSH;
1803
1804 add_acct_request(q, rq, where);
1805 if (where == ELEVATOR_INSERT_FLUSH)
1806 __blk_run_queue(q);
1807 spin_unlock_irqrestore(q->queue_lock, flags);
1808
1809 return 0;
1810}
1811EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829unsigned int blk_rq_err_bytes(const struct request *rq)
1830{
1831 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1832 unsigned int bytes = 0;
1833 struct bio *bio;
1834
1835 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1836 return blk_rq_bytes(rq);
1837
1838
1839
1840
1841
1842
1843
1844
1845 for (bio = rq->bio; bio; bio = bio->bi_next) {
1846 if ((bio->bi_rw & ff) != ff)
1847 break;
1848 bytes += bio->bi_size;
1849 }
1850
1851
1852 BUG_ON(blk_rq_bytes(rq) && !bytes);
1853 return bytes;
1854}
1855EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1856
1857static void blk_account_io_completion(struct request *req, unsigned int bytes)
1858{
1859 if (blk_do_io_stat(req)) {
1860 const int rw = rq_data_dir(req);
1861 struct hd_struct *part;
1862 int cpu;
1863
1864 cpu = part_stat_lock();
1865 part = req->part;
1866 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1867 part_stat_unlock();
1868 }
1869}
1870
1871static void blk_account_io_done(struct request *req)
1872{
1873
1874
1875
1876
1877
1878 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
1879 unsigned long duration = jiffies - req->start_time;
1880 const int rw = rq_data_dir(req);
1881 struct hd_struct *part;
1882 int cpu;
1883
1884 cpu = part_stat_lock();
1885 part = req->part;
1886
1887 part_stat_inc(cpu, part, ios[rw]);
1888 part_stat_add(cpu, part, ticks[rw], duration);
1889 part_round_stats(cpu, part);
1890 part_dec_in_flight(part, rw);
1891
1892 hd_struct_put(part);
1893 part_stat_unlock();
1894 }
1895}
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913struct request *blk_peek_request(struct request_queue *q)
1914{
1915 struct request *rq;
1916 int ret;
1917
1918 while ((rq = __elv_next_request(q)) != NULL) {
1919 if (!(rq->cmd_flags & REQ_STARTED)) {
1920
1921
1922
1923
1924
1925 if (rq->cmd_flags & REQ_SORTED)
1926 elv_activate_rq(q, rq);
1927
1928
1929
1930
1931
1932
1933 rq->cmd_flags |= REQ_STARTED;
1934 trace_block_rq_issue(q, rq);
1935 }
1936
1937 if (!q->boundary_rq || q->boundary_rq == rq) {
1938 q->end_sector = rq_end_sector(rq);
1939 q->boundary_rq = NULL;
1940 }
1941
1942 if (rq->cmd_flags & REQ_DONTPREP)
1943 break;
1944
1945 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1946
1947
1948
1949
1950
1951
1952 rq->nr_phys_segments++;
1953 }
1954
1955 if (!q->prep_rq_fn)
1956 break;
1957
1958 ret = q->prep_rq_fn(q, rq);
1959 if (ret == BLKPREP_OK) {
1960 break;
1961 } else if (ret == BLKPREP_DEFER) {
1962
1963
1964
1965
1966
1967
1968 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1969 !(rq->cmd_flags & REQ_DONTPREP)) {
1970
1971
1972
1973
1974 --rq->nr_phys_segments;
1975 }
1976
1977 rq = NULL;
1978 break;
1979 } else if (ret == BLKPREP_KILL) {
1980 rq->cmd_flags |= REQ_QUIET;
1981
1982
1983
1984
1985 blk_start_request(rq);
1986 __blk_end_request_all(rq, -EIO);
1987 } else {
1988 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1989 break;
1990 }
1991 }
1992
1993 return rq;
1994}
1995EXPORT_SYMBOL(blk_peek_request);
1996
1997void blk_dequeue_request(struct request *rq)
1998{
1999 struct request_queue *q = rq->q;
2000
2001 BUG_ON(list_empty(&rq->queuelist));
2002 BUG_ON(ELV_ON_HASH(rq));
2003
2004 list_del_init(&rq->queuelist);
2005
2006
2007
2008
2009
2010
2011 if (blk_account_rq(rq)) {
2012 q->in_flight[rq_is_sync(rq)]++;
2013 set_io_start_time_ns(rq);
2014 }
2015}
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031void blk_start_request(struct request *req)
2032{
2033 blk_dequeue_request(req);
2034
2035
2036
2037
2038
2039 req->resid_len = blk_rq_bytes(req);
2040 if (unlikely(blk_bidi_rq(req)))
2041 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
2042
2043 blk_add_timer(req);
2044}
2045EXPORT_SYMBOL(blk_start_request);
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062struct request *blk_fetch_request(struct request_queue *q)
2063{
2064 struct request *rq;
2065
2066 rq = blk_peek_request(q);
2067 if (rq)
2068 blk_start_request(rq);
2069 return rq;
2070}
2071EXPORT_SYMBOL(blk_fetch_request);
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2096{
2097 int total_bytes, bio_nbytes, next_idx = 0;
2098 struct bio *bio;
2099
2100 if (!req->bio)
2101 return false;
2102
2103 trace_block_rq_complete(req->q, req);
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113 if (req->cmd_type == REQ_TYPE_FS)
2114 req->errors = 0;
2115
2116 if (error && req->cmd_type == REQ_TYPE_FS &&
2117 !(req->cmd_flags & REQ_QUIET)) {
2118 char *error_type;
2119
2120 switch (error) {
2121 case -ENOLINK:
2122 error_type = "recoverable transport";
2123 break;
2124 case -EREMOTEIO:
2125 error_type = "critical target";
2126 break;
2127 case -EBADE:
2128 error_type = "critical nexus";
2129 break;
2130 case -EIO:
2131 default:
2132 error_type = "I/O";
2133 break;
2134 }
2135 printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
2136 error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
2137 (unsigned long long)blk_rq_pos(req));
2138 }
2139
2140 blk_account_io_completion(req, nr_bytes);
2141
2142 total_bytes = bio_nbytes = 0;
2143 while ((bio = req->bio) != NULL) {
2144 int nbytes;
2145
2146 if (nr_bytes >= bio->bi_size) {
2147 req->bio = bio->bi_next;
2148 nbytes = bio->bi_size;
2149 req_bio_endio(req, bio, nbytes, error);
2150 next_idx = 0;
2151 bio_nbytes = 0;
2152 } else {
2153 int idx = bio->bi_idx + next_idx;
2154
2155 if (unlikely(idx >= bio->bi_vcnt)) {
2156 blk_dump_rq_flags(req, "__end_that");
2157 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2158 __func__, idx, bio->bi_vcnt);
2159 break;
2160 }
2161
2162 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2163 BIO_BUG_ON(nbytes > bio->bi_size);
2164
2165
2166
2167
2168 if (unlikely(nbytes > nr_bytes)) {
2169 bio_nbytes += nr_bytes;
2170 total_bytes += nr_bytes;
2171 break;
2172 }
2173
2174
2175
2176
2177 next_idx++;
2178 bio_nbytes += nbytes;
2179 }
2180
2181 total_bytes += nbytes;
2182 nr_bytes -= nbytes;
2183
2184 bio = req->bio;
2185 if (bio) {
2186
2187
2188
2189 if (unlikely(nr_bytes <= 0))
2190 break;
2191 }
2192 }
2193
2194
2195
2196
2197 if (!req->bio) {
2198
2199
2200
2201
2202
2203 req->__data_len = 0;
2204 return false;
2205 }
2206
2207
2208
2209
2210 if (bio_nbytes) {
2211 req_bio_endio(req, bio, bio_nbytes, error);
2212 bio->bi_idx += next_idx;
2213 bio_iovec(bio)->bv_offset += nr_bytes;
2214 bio_iovec(bio)->bv_len -= nr_bytes;
2215 }
2216
2217 req->__data_len -= total_bytes;
2218 req->buffer = bio_data(req->bio);
2219
2220
2221 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
2222 req->__sector += total_bytes >> 9;
2223
2224
2225 if (req->cmd_flags & REQ_MIXED_MERGE) {
2226 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2227 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2228 }
2229
2230
2231
2232
2233
2234 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2235 blk_dump_rq_flags(req, "request botched");
2236 req->__data_len = blk_rq_cur_bytes(req);
2237 }
2238
2239
2240 blk_recalc_rq_segments(req);
2241
2242 return true;
2243}
2244EXPORT_SYMBOL_GPL(blk_update_request);
2245
2246static bool blk_update_bidi_request(struct request *rq, int error,
2247 unsigned int nr_bytes,
2248 unsigned int bidi_bytes)
2249{
2250 if (blk_update_request(rq, error, nr_bytes))
2251 return true;
2252
2253
2254 if (unlikely(blk_bidi_rq(rq)) &&
2255 blk_update_request(rq->next_rq, error, bidi_bytes))
2256 return true;
2257
2258 if (blk_queue_add_random(rq->q))
2259 add_disk_randomness(rq->rq_disk);
2260
2261 return false;
2262}
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274void blk_unprep_request(struct request *req)
2275{
2276 struct request_queue *q = req->q;
2277
2278 req->cmd_flags &= ~REQ_DONTPREP;
2279 if (q->unprep_rq_fn)
2280 q->unprep_rq_fn(q, req);
2281}
2282EXPORT_SYMBOL_GPL(blk_unprep_request);
2283
2284
2285
2286
2287static void blk_finish_request(struct request *req, int error)
2288{
2289 if (blk_rq_tagged(req))
2290 blk_queue_end_tag(req->q, req);
2291
2292 BUG_ON(blk_queued_rq(req));
2293
2294 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2295 laptop_io_completion(&req->q->backing_dev_info);
2296
2297 blk_delete_timer(req);
2298
2299 if (req->cmd_flags & REQ_DONTPREP)
2300 blk_unprep_request(req);
2301
2302
2303 blk_account_io_done(req);
2304
2305 if (req->end_io)
2306 req->end_io(req, error);
2307 else {
2308 if (blk_bidi_rq(req))
2309 __blk_put_request(req->next_rq->q, req->next_rq);
2310
2311 __blk_put_request(req->q, req);
2312 }
2313}
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332static bool blk_end_bidi_request(struct request *rq, int error,
2333 unsigned int nr_bytes, unsigned int bidi_bytes)
2334{
2335 struct request_queue *q = rq->q;
2336 unsigned long flags;
2337
2338 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2339 return true;
2340
2341 spin_lock_irqsave(q->queue_lock, flags);
2342 blk_finish_request(rq, error);
2343 spin_unlock_irqrestore(q->queue_lock, flags);
2344
2345 return false;
2346}
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363bool __blk_end_bidi_request(struct request *rq, int error,
2364 unsigned int nr_bytes, unsigned int bidi_bytes)
2365{
2366 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2367 return true;
2368
2369 blk_finish_request(rq, error);
2370
2371 return false;
2372}
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2389{
2390 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2391}
2392EXPORT_SYMBOL(blk_end_request);
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402void blk_end_request_all(struct request *rq, int error)
2403{
2404 bool pending;
2405 unsigned int bidi_bytes = 0;
2406
2407 if (unlikely(blk_bidi_rq(rq)))
2408 bidi_bytes = blk_rq_bytes(rq->next_rq);
2409
2410 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2411 BUG_ON(pending);
2412}
2413EXPORT_SYMBOL(blk_end_request_all);
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427bool blk_end_request_cur(struct request *rq, int error)
2428{
2429 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2430}
2431EXPORT_SYMBOL(blk_end_request_cur);
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445bool blk_end_request_err(struct request *rq, int error)
2446{
2447 WARN_ON(error >= 0);
2448 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2449}
2450EXPORT_SYMBOL_GPL(blk_end_request_err);
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2466{
2467 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2468}
2469EXPORT_SYMBOL(__blk_end_request);
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479void __blk_end_request_all(struct request *rq, int error)
2480{
2481 bool pending;
2482 unsigned int bidi_bytes = 0;
2483
2484 if (unlikely(blk_bidi_rq(rq)))
2485 bidi_bytes = blk_rq_bytes(rq->next_rq);
2486
2487 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2488 BUG_ON(pending);
2489}
2490EXPORT_SYMBOL(__blk_end_request_all);
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505bool __blk_end_request_cur(struct request *rq, int error)
2506{
2507 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2508}
2509EXPORT_SYMBOL(__blk_end_request_cur);
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524bool __blk_end_request_err(struct request *rq, int error)
2525{
2526 WARN_ON(error >= 0);
2527 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2528}
2529EXPORT_SYMBOL_GPL(__blk_end_request_err);
2530
2531void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2532 struct bio *bio)
2533{
2534
2535 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2536
2537 if (bio_has_data(bio)) {
2538 rq->nr_phys_segments = bio_phys_segments(q, bio);
2539 rq->buffer = bio_data(bio);
2540 }
2541 rq->__data_len = bio->bi_size;
2542 rq->bio = rq->biotail = bio;
2543
2544 if (bio->bi_bdev)
2545 rq->rq_disk = bio->bi_bdev->bd_disk;
2546}
2547
2548#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2549
2550
2551
2552
2553
2554
2555
2556void rq_flush_dcache_pages(struct request *rq)
2557{
2558 struct req_iterator iter;
2559 struct bio_vec *bvec;
2560
2561 rq_for_each_segment(bvec, rq, iter)
2562 flush_dcache_page(bvec->bv_page);
2563}
2564EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2565#endif
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586int blk_lld_busy(struct request_queue *q)
2587{
2588 if (q->lld_busy_fn)
2589 return q->lld_busy_fn(q);
2590
2591 return 0;
2592}
2593EXPORT_SYMBOL_GPL(blk_lld_busy);
2594
2595
2596
2597
2598
2599
2600
2601
2602void blk_rq_unprep_clone(struct request *rq)
2603{
2604 struct bio *bio;
2605
2606 while ((bio = rq->bio) != NULL) {
2607 rq->bio = bio->bi_next;
2608
2609 bio_put(bio);
2610 }
2611}
2612EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2613
2614
2615
2616
2617
2618static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2619{
2620 dst->cpu = src->cpu;
2621 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
2622 dst->cmd_type = src->cmd_type;
2623 dst->__sector = blk_rq_pos(src);
2624 dst->__data_len = blk_rq_bytes(src);
2625 dst->nr_phys_segments = src->nr_phys_segments;
2626 dst->ioprio = src->ioprio;
2627 dst->extra_len = src->extra_len;
2628}
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2650 struct bio_set *bs, gfp_t gfp_mask,
2651 int (*bio_ctr)(struct bio *, struct bio *, void *),
2652 void *data)
2653{
2654 struct bio *bio, *bio_src;
2655
2656 if (!bs)
2657 bs = fs_bio_set;
2658
2659 blk_rq_init(NULL, rq);
2660
2661 __rq_for_each_bio(bio_src, rq_src) {
2662 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2663 if (!bio)
2664 goto free_and_out;
2665
2666 __bio_clone(bio, bio_src);
2667
2668 if (bio_integrity(bio_src) &&
2669 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2670 goto free_and_out;
2671
2672 if (bio_ctr && bio_ctr(bio, bio_src, data))
2673 goto free_and_out;
2674
2675 if (rq->bio) {
2676 rq->biotail->bi_next = bio;
2677 rq->biotail = bio;
2678 } else
2679 rq->bio = rq->biotail = bio;
2680 }
2681
2682 __blk_rq_prep_clone(rq, rq_src);
2683
2684 return 0;
2685
2686free_and_out:
2687 if (bio)
2688 bio_free(bio, bs);
2689 blk_rq_unprep_clone(rq);
2690
2691 return -ENOMEM;
2692}
2693EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2694
2695int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2696{
2697 return queue_work(kblockd_workqueue, work);
2698}
2699EXPORT_SYMBOL(kblockd_schedule_work);
2700
2701int kblockd_schedule_delayed_work(struct request_queue *q,
2702 struct delayed_work *dwork, unsigned long delay)
2703{
2704 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2705}
2706EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2707
2708#define PLUG_MAGIC 0x91827364
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724void blk_start_plug(struct blk_plug *plug)
2725{
2726 struct task_struct *tsk = current;
2727
2728 plug->magic = PLUG_MAGIC;
2729 INIT_LIST_HEAD(&plug->list);
2730 INIT_LIST_HEAD(&plug->cb_list);
2731 plug->should_sort = 0;
2732
2733
2734
2735
2736
2737 if (!tsk->plug) {
2738
2739
2740
2741
2742 tsk->plug = plug;
2743 }
2744}
2745EXPORT_SYMBOL(blk_start_plug);
2746
2747static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2748{
2749 struct request *rqa = container_of(a, struct request, queuelist);
2750 struct request *rqb = container_of(b, struct request, queuelist);
2751
2752 return !(rqa->q <= rqb->q);
2753}
2754
2755
2756
2757
2758
2759
2760
2761static void queue_unplugged(struct request_queue *q, unsigned int depth,
2762 bool from_schedule)
2763 __releases(q->queue_lock)
2764{
2765 trace_block_unplug(q, depth, !from_schedule);
2766
2767
2768
2769
2770 if (unlikely(blk_queue_dead(q))) {
2771 spin_unlock(q->queue_lock);
2772 return;
2773 }
2774
2775
2776
2777
2778
2779
2780 if (from_schedule) {
2781 spin_unlock(q->queue_lock);
2782 blk_run_queue_async(q);
2783 } else {
2784 __blk_run_queue(q);
2785 spin_unlock(q->queue_lock);
2786 }
2787
2788}
2789
2790static void flush_plug_callbacks(struct blk_plug *plug)
2791{
2792 LIST_HEAD(callbacks);
2793
2794 if (list_empty(&plug->cb_list))
2795 return;
2796
2797 list_splice_init(&plug->cb_list, &callbacks);
2798
2799 while (!list_empty(&callbacks)) {
2800 struct blk_plug_cb *cb = list_first_entry(&callbacks,
2801 struct blk_plug_cb,
2802 list);
2803 list_del(&cb->list);
2804 cb->callback(cb);
2805 }
2806}
2807
2808void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2809{
2810 struct request_queue *q;
2811 unsigned long flags;
2812 struct request *rq;
2813 LIST_HEAD(list);
2814 unsigned int depth;
2815
2816 BUG_ON(plug->magic != PLUG_MAGIC);
2817
2818 flush_plug_callbacks(plug);
2819 if (list_empty(&plug->list))
2820 return;
2821
2822 list_splice_init(&plug->list, &list);
2823
2824 if (plug->should_sort) {
2825 list_sort(NULL, &list, plug_rq_cmp);
2826 plug->should_sort = 0;
2827 }
2828
2829 q = NULL;
2830 depth = 0;
2831
2832
2833
2834
2835
2836 local_irq_save(flags);
2837 while (!list_empty(&list)) {
2838 rq = list_entry_rq(list.next);
2839 list_del_init(&rq->queuelist);
2840 BUG_ON(!rq->q);
2841 if (rq->q != q) {
2842
2843
2844
2845 if (q)
2846 queue_unplugged(q, depth, from_schedule);
2847 q = rq->q;
2848 depth = 0;
2849 spin_lock(q->queue_lock);
2850 }
2851
2852
2853
2854
2855 if (unlikely(blk_queue_dead(q))) {
2856 __blk_end_request_all(rq, -ENODEV);
2857 continue;
2858 }
2859
2860
2861
2862
2863 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
2864 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
2865 else
2866 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
2867
2868 depth++;
2869 }
2870
2871
2872
2873
2874 if (q)
2875 queue_unplugged(q, depth, from_schedule);
2876
2877 local_irq_restore(flags);
2878}
2879
2880void blk_finish_plug(struct blk_plug *plug)
2881{
2882 blk_flush_plug_list(plug, false);
2883
2884 if (plug == current->plug)
2885 current->plug = NULL;
2886}
2887EXPORT_SYMBOL(blk_finish_plug);
2888
2889int __init blk_dev_init(void)
2890{
2891 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2892 sizeof(((struct request *)0)->cmd_flags));
2893
2894
2895 kblockd_workqueue = alloc_workqueue("kblockd",
2896 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2897 if (!kblockd_workqueue)
2898 panic("Failed to create kblockd\n");
2899
2900 request_cachep = kmem_cache_create("blkdev_requests",
2901 sizeof(struct request), 0, SLAB_PANIC, NULL);
2902
2903 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2904 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2905
2906 return 0;
2907}
2908