1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/highmem.h>
20#include <linux/mm.h>
21#include <linux/kernel_stat.h>
22#include <linux/string.h>
23#include <linux/init.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h>
29#include <linux/fault-inject.h>
30#include <linux/list_sort.h>
31
32#define CREATE_TRACE_POINTS
33#include <trace/events/block.h>
34
35#include "blk.h"
36
37EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
39EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
40
41static int __make_request(struct request_queue *q, struct bio *bio);
42
43
44
45
46static struct kmem_cache *request_cachep;
47
48
49
50
51struct kmem_cache *blk_requestq_cachep;
52
53
54
55
56static struct workqueue_struct *kblockd_workqueue;
57
58static void drive_stat_acct(struct request *rq, int new_io)
59{
60 struct hd_struct *part;
61 int rw = rq_data_dir(rq);
62 int cpu;
63
64 if (!blk_do_io_stat(rq))
65 return;
66
67 cpu = part_stat_lock();
68
69 if (!new_io) {
70 part = rq->part;
71 part_stat_inc(cpu, part, merges[rw]);
72 } else {
73 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
74 if (!hd_struct_try_get(part)) {
75
76
77
78
79
80
81
82
83 part = &rq->rq_disk->part0;
84 hd_struct_get(part);
85 }
86 part_round_stats(cpu, part);
87 part_inc_in_flight(part, rw);
88 rq->part = part;
89 }
90
91 part_stat_unlock();
92}
93
94void blk_queue_congestion_threshold(struct request_queue *q)
95{
96 int nr;
97
98 nr = q->nr_requests - (q->nr_requests / 8) + 1;
99 if (nr > q->nr_requests)
100 nr = q->nr_requests;
101 q->nr_congestion_on = nr;
102
103 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
104 if (nr < 1)
105 nr = 1;
106 q->nr_congestion_off = nr;
107}
108
109
110
111
112
113
114
115
116
117
118struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
119{
120 struct backing_dev_info *ret = NULL;
121 struct request_queue *q = bdev_get_queue(bdev);
122
123 if (q)
124 ret = &q->backing_dev_info;
125 return ret;
126}
127EXPORT_SYMBOL(blk_get_backing_dev_info);
128
129void blk_rq_init(struct request_queue *q, struct request *rq)
130{
131 memset(rq, 0, sizeof(*rq));
132
133 INIT_LIST_HEAD(&rq->queuelist);
134 INIT_LIST_HEAD(&rq->timeout_list);
135 rq->cpu = -1;
136 rq->q = q;
137 rq->__sector = (sector_t) -1;
138 INIT_HLIST_NODE(&rq->hash);
139 RB_CLEAR_NODE(&rq->rb_node);
140 rq->cmd = rq->__cmd;
141 rq->cmd_len = BLK_MAX_CDB;
142 rq->tag = -1;
143 rq->ref_count = 1;
144 rq->start_time = jiffies;
145 set_start_time_ns(rq);
146 rq->part = NULL;
147}
148EXPORT_SYMBOL(blk_rq_init);
149
150static void req_bio_endio(struct request *rq, struct bio *bio,
151 unsigned int nbytes, int error)
152{
153 if (error)
154 clear_bit(BIO_UPTODATE, &bio->bi_flags);
155 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
156 error = -EIO;
157
158 if (unlikely(nbytes > bio->bi_size)) {
159 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
160 __func__, nbytes, bio->bi_size);
161 nbytes = bio->bi_size;
162 }
163
164 if (unlikely(rq->cmd_flags & REQ_QUIET))
165 set_bit(BIO_QUIET, &bio->bi_flags);
166
167 bio->bi_size -= nbytes;
168 bio->bi_sector += (nbytes >> 9);
169
170 if (bio_integrity(bio))
171 bio_integrity_advance(bio, nbytes);
172
173
174 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
175 bio_endio(bio, error);
176}
177
178void blk_dump_rq_flags(struct request *rq, char *msg)
179{
180 int bit;
181
182 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
183 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
184 rq->cmd_flags);
185
186 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
187 (unsigned long long)blk_rq_pos(rq),
188 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
189 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
190 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
191
192 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
193 printk(KERN_INFO " cdb: ");
194 for (bit = 0; bit < BLK_MAX_CDB; bit++)
195 printk("%02x ", rq->cmd[bit]);
196 printk("\n");
197 }
198}
199EXPORT_SYMBOL(blk_dump_rq_flags);
200
201static void blk_delay_work(struct work_struct *work)
202{
203 struct request_queue *q;
204
205 q = container_of(work, struct request_queue, delay_work.work);
206 spin_lock_irq(q->queue_lock);
207 __blk_run_queue(q);
208 spin_unlock_irq(q->queue_lock);
209}
210
211
212
213
214
215
216
217
218
219
220
221void blk_delay_queue(struct request_queue *q, unsigned long msecs)
222{
223 queue_delayed_work(kblockd_workqueue, &q->delay_work,
224 msecs_to_jiffies(msecs));
225}
226EXPORT_SYMBOL(blk_delay_queue);
227
228
229
230
231
232
233
234
235
236
237void blk_start_queue(struct request_queue *q)
238{
239 WARN_ON(!irqs_disabled());
240
241 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
242 __blk_run_queue(q);
243}
244EXPORT_SYMBOL(blk_start_queue);
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260void blk_stop_queue(struct request_queue *q)
261{
262 __cancel_delayed_work(&q->delay_work);
263 queue_flag_set(QUEUE_FLAG_STOPPED, q);
264}
265EXPORT_SYMBOL(blk_stop_queue);
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285void blk_sync_queue(struct request_queue *q)
286{
287 del_timer_sync(&q->timeout);
288 cancel_delayed_work_sync(&q->delay_work);
289}
290EXPORT_SYMBOL(blk_sync_queue);
291
292
293
294
295
296
297
298
299
300void __blk_run_queue(struct request_queue *q)
301{
302 if (unlikely(blk_queue_stopped(q)))
303 return;
304
305 q->request_fn(q);
306}
307EXPORT_SYMBOL(__blk_run_queue);
308
309
310
311
312
313
314
315
316
317void blk_run_queue_async(struct request_queue *q)
318{
319 if (likely(!blk_queue_stopped(q))) {
320 __cancel_delayed_work(&q->delay_work);
321 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
322 }
323}
324EXPORT_SYMBOL(blk_run_queue_async);
325
326
327
328
329
330
331
332
333
334void blk_run_queue(struct request_queue *q)
335{
336 unsigned long flags;
337
338 spin_lock_irqsave(q->queue_lock, flags);
339 __blk_run_queue(q);
340 spin_unlock_irqrestore(q->queue_lock, flags);
341}
342EXPORT_SYMBOL(blk_run_queue);
343
344void blk_put_queue(struct request_queue *q)
345{
346 kobject_put(&q->kobj);
347}
348EXPORT_SYMBOL(blk_put_queue);
349
350
351
352
353
354
355void blk_cleanup_queue(struct request_queue *q)
356{
357
358
359
360
361
362
363 blk_sync_queue(q);
364
365 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
366 mutex_lock(&q->sysfs_lock);
367 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
368 mutex_unlock(&q->sysfs_lock);
369
370 if (q->elevator)
371 elevator_exit(q->elevator);
372
373 blk_throtl_exit(q);
374
375 blk_put_queue(q);
376}
377EXPORT_SYMBOL(blk_cleanup_queue);
378
379static int blk_init_free_list(struct request_queue *q)
380{
381 struct request_list *rl = &q->rq;
382
383 if (unlikely(rl->rq_pool))
384 return 0;
385
386 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
387 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
388 rl->elvpriv = 0;
389 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
390 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
391
392 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
393 mempool_free_slab, request_cachep, q->node);
394
395 if (!rl->rq_pool)
396 return -ENOMEM;
397
398 return 0;
399}
400
401struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
402{
403 return blk_alloc_queue_node(gfp_mask, -1);
404}
405EXPORT_SYMBOL(blk_alloc_queue);
406
407struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
408{
409 struct request_queue *q;
410 int err;
411
412 q = kmem_cache_alloc_node(blk_requestq_cachep,
413 gfp_mask | __GFP_ZERO, node_id);
414 if (!q)
415 return NULL;
416
417 q->backing_dev_info.ra_pages =
418 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
419 q->backing_dev_info.state = 0;
420 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
421 q->backing_dev_info.name = "block";
422
423 err = bdi_init(&q->backing_dev_info);
424 if (err) {
425 kmem_cache_free(blk_requestq_cachep, q);
426 return NULL;
427 }
428
429 if (blk_throtl_init(q)) {
430 kmem_cache_free(blk_requestq_cachep, q);
431 return NULL;
432 }
433
434 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
435 laptop_mode_timer_fn, (unsigned long) q);
436 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
437 INIT_LIST_HEAD(&q->timeout_list);
438 INIT_LIST_HEAD(&q->flush_queue[0]);
439 INIT_LIST_HEAD(&q->flush_queue[1]);
440 INIT_LIST_HEAD(&q->flush_data_in_flight);
441 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
442
443 kobject_init(&q->kobj, &blk_queue_ktype);
444
445 mutex_init(&q->sysfs_lock);
446 spin_lock_init(&q->__queue_lock);
447
448
449
450
451
452 q->queue_lock = &q->__queue_lock;
453
454 return q;
455}
456EXPORT_SYMBOL(blk_alloc_queue_node);
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
492{
493 return blk_init_queue_node(rfn, lock, -1);
494}
495EXPORT_SYMBOL(blk_init_queue);
496
497struct request_queue *
498blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
499{
500 struct request_queue *uninit_q, *q;
501
502 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
503 if (!uninit_q)
504 return NULL;
505
506 q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
507 if (!q)
508 blk_cleanup_queue(uninit_q);
509
510 return q;
511}
512EXPORT_SYMBOL(blk_init_queue_node);
513
514struct request_queue *
515blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
516 spinlock_t *lock)
517{
518 return blk_init_allocated_queue_node(q, rfn, lock, -1);
519}
520EXPORT_SYMBOL(blk_init_allocated_queue);
521
522struct request_queue *
523blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
524 spinlock_t *lock, int node_id)
525{
526 if (!q)
527 return NULL;
528
529 q->node = node_id;
530 if (blk_init_free_list(q))
531 return NULL;
532
533 q->request_fn = rfn;
534 q->prep_rq_fn = NULL;
535 q->unprep_rq_fn = NULL;
536 q->queue_flags = QUEUE_FLAG_DEFAULT;
537
538
539 if (lock)
540 q->queue_lock = lock;
541
542
543
544
545 blk_queue_make_request(q, __make_request);
546
547 q->sg_reserved_size = INT_MAX;
548
549
550
551
552 if (!elevator_init(q, NULL)) {
553 blk_queue_congestion_threshold(q);
554 return q;
555 }
556
557 return NULL;
558}
559EXPORT_SYMBOL(blk_init_allocated_queue_node);
560
561int blk_get_queue(struct request_queue *q)
562{
563 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
564 kobject_get(&q->kobj);
565 return 0;
566 }
567
568 return 1;
569}
570EXPORT_SYMBOL(blk_get_queue);
571
572static inline void blk_free_request(struct request_queue *q, struct request *rq)
573{
574 BUG_ON(rq->cmd_flags & REQ_ON_PLUG);
575
576 if (rq->cmd_flags & REQ_ELVPRIV)
577 elv_put_request(q, rq);
578 mempool_free(rq, q->rq.rq_pool);
579}
580
581static struct request *
582blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask)
583{
584 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
585
586 if (!rq)
587 return NULL;
588
589 blk_rq_init(q, rq);
590
591 rq->cmd_flags = flags | REQ_ALLOCED;
592
593 if (priv) {
594 if (unlikely(elv_set_request(q, rq, gfp_mask))) {
595 mempool_free(rq, q->rq.rq_pool);
596 return NULL;
597 }
598 rq->cmd_flags |= REQ_ELVPRIV;
599 }
600
601 return rq;
602}
603
604
605
606
607
608static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
609{
610 if (!ioc)
611 return 0;
612
613
614
615
616
617
618 return ioc->nr_batch_requests == q->nr_batching ||
619 (ioc->nr_batch_requests > 0
620 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
621}
622
623
624
625
626
627
628
629static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
630{
631 if (!ioc || ioc_batching(q, ioc))
632 return;
633
634 ioc->nr_batch_requests = q->nr_batching;
635 ioc->last_waited = jiffies;
636}
637
638static void __freed_request(struct request_queue *q, int sync)
639{
640 struct request_list *rl = &q->rq;
641
642 if (rl->count[sync] < queue_congestion_off_threshold(q))
643 blk_clear_queue_congested(q, sync);
644
645 if (rl->count[sync] + 1 <= q->nr_requests) {
646 if (waitqueue_active(&rl->wait[sync]))
647 wake_up(&rl->wait[sync]);
648
649 blk_clear_queue_full(q, sync);
650 }
651}
652
653
654
655
656
657static void freed_request(struct request_queue *q, int sync, int priv)
658{
659 struct request_list *rl = &q->rq;
660
661 rl->count[sync]--;
662 if (priv)
663 rl->elvpriv--;
664
665 __freed_request(q, sync);
666
667 if (unlikely(rl->starved[sync ^ 1]))
668 __freed_request(q, sync ^ 1);
669}
670
671
672
673
674
675static bool blk_rq_should_init_elevator(struct bio *bio)
676{
677 if (!bio)
678 return true;
679
680
681
682
683
684 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
685 return false;
686
687 return true;
688}
689
690
691
692
693
694
695static struct request *get_request(struct request_queue *q, int rw_flags,
696 struct bio *bio, gfp_t gfp_mask)
697{
698 struct request *rq = NULL;
699 struct request_list *rl = &q->rq;
700 struct io_context *ioc = NULL;
701 const bool is_sync = rw_is_sync(rw_flags) != 0;
702 int may_queue, priv = 0;
703
704 may_queue = elv_may_queue(q, rw_flags);
705 if (may_queue == ELV_MQUEUE_NO)
706 goto rq_starved;
707
708 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
709 if (rl->count[is_sync]+1 >= q->nr_requests) {
710 ioc = current_io_context(GFP_ATOMIC, q->node);
711
712
713
714
715
716
717 if (!blk_queue_full(q, is_sync)) {
718 ioc_set_batching(q, ioc);
719 blk_set_queue_full(q, is_sync);
720 } else {
721 if (may_queue != ELV_MQUEUE_MUST
722 && !ioc_batching(q, ioc)) {
723
724
725
726
727
728 goto out;
729 }
730 }
731 }
732 blk_set_queue_congested(q, is_sync);
733 }
734
735
736
737
738
739
740 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
741 goto out;
742
743 rl->count[is_sync]++;
744 rl->starved[is_sync] = 0;
745
746 if (blk_rq_should_init_elevator(bio)) {
747 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
748 if (priv)
749 rl->elvpriv++;
750 }
751
752 if (blk_queue_io_stat(q))
753 rw_flags |= REQ_IO_STAT;
754 spin_unlock_irq(q->queue_lock);
755
756 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask);
757 if (unlikely(!rq)) {
758
759
760
761
762
763
764
765 spin_lock_irq(q->queue_lock);
766 freed_request(q, is_sync, priv);
767
768
769
770
771
772
773
774
775rq_starved:
776 if (unlikely(rl->count[is_sync] == 0))
777 rl->starved[is_sync] = 1;
778
779 goto out;
780 }
781
782
783
784
785
786
787
788 if (ioc_batching(q, ioc))
789 ioc->nr_batch_requests--;
790
791 trace_block_getrq(q, bio, rw_flags & 1);
792out:
793 return rq;
794}
795
796
797
798
799
800
801
802static struct request *get_request_wait(struct request_queue *q, int rw_flags,
803 struct bio *bio)
804{
805 const bool is_sync = rw_is_sync(rw_flags) != 0;
806 struct request *rq;
807
808 rq = get_request(q, rw_flags, bio, GFP_NOIO);
809 while (!rq) {
810 DEFINE_WAIT(wait);
811 struct io_context *ioc;
812 struct request_list *rl = &q->rq;
813
814 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
815 TASK_UNINTERRUPTIBLE);
816
817 trace_block_sleeprq(q, bio, rw_flags & 1);
818
819 spin_unlock_irq(q->queue_lock);
820 io_schedule();
821
822
823
824
825
826
827
828 ioc = current_io_context(GFP_NOIO, q->node);
829 ioc_set_batching(q, ioc);
830
831 spin_lock_irq(q->queue_lock);
832 finish_wait(&rl->wait[is_sync], &wait);
833
834 rq = get_request(q, rw_flags, bio, GFP_NOIO);
835 };
836
837 return rq;
838}
839
840struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
841{
842 struct request *rq;
843
844 BUG_ON(rw != READ && rw != WRITE);
845
846 spin_lock_irq(q->queue_lock);
847 if (gfp_mask & __GFP_WAIT) {
848 rq = get_request_wait(q, rw, NULL);
849 } else {
850 rq = get_request(q, rw, NULL, gfp_mask);
851 if (!rq)
852 spin_unlock_irq(q->queue_lock);
853 }
854
855
856 return rq;
857}
858EXPORT_SYMBOL(blk_get_request);
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891struct request *blk_make_request(struct request_queue *q, struct bio *bio,
892 gfp_t gfp_mask)
893{
894 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
895
896 if (unlikely(!rq))
897 return ERR_PTR(-ENOMEM);
898
899 for_each_bio(bio) {
900 struct bio *bounce_bio = bio;
901 int ret;
902
903 blk_queue_bounce(q, &bounce_bio);
904 ret = blk_rq_append_bio(q, rq, bounce_bio);
905 if (unlikely(ret)) {
906 blk_put_request(rq);
907 return ERR_PTR(ret);
908 }
909 }
910
911 return rq;
912}
913EXPORT_SYMBOL(blk_make_request);
914
915
916
917
918
919
920
921
922
923
924
925void blk_requeue_request(struct request_queue *q, struct request *rq)
926{
927 blk_delete_timer(rq);
928 blk_clear_rq_complete(rq);
929 trace_block_rq_requeue(q, rq);
930
931 if (blk_rq_tagged(rq))
932 blk_queue_end_tag(q, rq);
933
934 BUG_ON(blk_queued_rq(rq));
935
936 elv_requeue_request(q, rq);
937}
938EXPORT_SYMBOL(blk_requeue_request);
939
940static void add_acct_request(struct request_queue *q, struct request *rq,
941 int where)
942{
943 drive_stat_acct(rq, 1);
944 __elv_add_request(q, rq, where);
945}
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966void blk_insert_request(struct request_queue *q, struct request *rq,
967 int at_head, void *data)
968{
969 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
970 unsigned long flags;
971
972
973
974
975
976
977 rq->cmd_type = REQ_TYPE_SPECIAL;
978
979 rq->special = data;
980
981 spin_lock_irqsave(q->queue_lock, flags);
982
983
984
985
986 if (blk_rq_tagged(rq))
987 blk_queue_end_tag(q, rq);
988
989 add_acct_request(q, rq, where);
990 __blk_run_queue(q);
991 spin_unlock_irqrestore(q->queue_lock, flags);
992}
993EXPORT_SYMBOL(blk_insert_request);
994
995static void part_round_stats_single(int cpu, struct hd_struct *part,
996 unsigned long now)
997{
998 if (now == part->stamp)
999 return;
1000
1001 if (part_in_flight(part)) {
1002 __part_stat_add(cpu, part, time_in_queue,
1003 part_in_flight(part) * (now - part->stamp));
1004 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1005 }
1006 part->stamp = now;
1007}
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025void part_round_stats(int cpu, struct hd_struct *part)
1026{
1027 unsigned long now = jiffies;
1028
1029 if (part->partno)
1030 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1031 part_round_stats_single(cpu, part, now);
1032}
1033EXPORT_SYMBOL_GPL(part_round_stats);
1034
1035
1036
1037
1038void __blk_put_request(struct request_queue *q, struct request *req)
1039{
1040 if (unlikely(!q))
1041 return;
1042 if (unlikely(--req->ref_count))
1043 return;
1044
1045 elv_completed_request(q, req);
1046
1047
1048 WARN_ON(req->bio != NULL);
1049
1050
1051
1052
1053
1054 if (req->cmd_flags & REQ_ALLOCED) {
1055 int is_sync = rq_is_sync(req) != 0;
1056 int priv = req->cmd_flags & REQ_ELVPRIV;
1057
1058 BUG_ON(!list_empty(&req->queuelist));
1059 BUG_ON(!hlist_unhashed(&req->hash));
1060
1061 blk_free_request(q, req);
1062 freed_request(q, is_sync, priv);
1063 }
1064}
1065EXPORT_SYMBOL_GPL(__blk_put_request);
1066
1067void blk_put_request(struct request *req)
1068{
1069 unsigned long flags;
1070 struct request_queue *q = req->q;
1071
1072 spin_lock_irqsave(q->queue_lock, flags);
1073 __blk_put_request(q, req);
1074 spin_unlock_irqrestore(q->queue_lock, flags);
1075}
1076EXPORT_SYMBOL(blk_put_request);
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091void blk_add_request_payload(struct request *rq, struct page *page,
1092 unsigned int len)
1093{
1094 struct bio *bio = rq->bio;
1095
1096 bio->bi_io_vec->bv_page = page;
1097 bio->bi_io_vec->bv_offset = 0;
1098 bio->bi_io_vec->bv_len = len;
1099
1100 bio->bi_size = len;
1101 bio->bi_vcnt = 1;
1102 bio->bi_phys_segments = 1;
1103
1104 rq->__data_len = rq->resid_len = len;
1105 rq->nr_phys_segments = 1;
1106 rq->buffer = bio_data(bio);
1107}
1108EXPORT_SYMBOL_GPL(blk_add_request_payload);
1109
1110static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1111 struct bio *bio)
1112{
1113 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1114
1115
1116
1117
1118 if (!rq_mergeable(req)) {
1119 blk_dump_rq_flags(req, "back");
1120 return false;
1121 }
1122
1123 if (!ll_back_merge_fn(q, req, bio))
1124 return false;
1125
1126 trace_block_bio_backmerge(q, bio);
1127
1128 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1129 blk_rq_set_mixed_merge(req);
1130
1131 req->biotail->bi_next = bio;
1132 req->biotail = bio;
1133 req->__data_len += bio->bi_size;
1134 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1135
1136 drive_stat_acct(req, 0);
1137 return true;
1138}
1139
1140static bool bio_attempt_front_merge(struct request_queue *q,
1141 struct request *req, struct bio *bio)
1142{
1143 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1144 sector_t sector;
1145
1146
1147
1148
1149 if (!rq_mergeable(req)) {
1150 blk_dump_rq_flags(req, "front");
1151 return false;
1152 }
1153
1154 if (!ll_front_merge_fn(q, req, bio))
1155 return false;
1156
1157 trace_block_bio_frontmerge(q, bio);
1158
1159 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1160 blk_rq_set_mixed_merge(req);
1161
1162 sector = bio->bi_sector;
1163
1164 bio->bi_next = req->bio;
1165 req->bio = bio;
1166
1167
1168
1169
1170
1171
1172 req->buffer = bio_data(bio);
1173 req->__sector = bio->bi_sector;
1174 req->__data_len += bio->bi_size;
1175 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1176
1177 drive_stat_acct(req, 0);
1178 return true;
1179}
1180
1181
1182
1183
1184
1185static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q,
1186 struct bio *bio)
1187{
1188 struct blk_plug *plug;
1189 struct request *rq;
1190 bool ret = false;
1191
1192 plug = tsk->plug;
1193 if (!plug)
1194 goto out;
1195
1196 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1197 int el_ret;
1198
1199 if (rq->q != q)
1200 continue;
1201
1202 el_ret = elv_try_merge(rq, bio);
1203 if (el_ret == ELEVATOR_BACK_MERGE) {
1204 ret = bio_attempt_back_merge(q, rq, bio);
1205 if (ret)
1206 break;
1207 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1208 ret = bio_attempt_front_merge(q, rq, bio);
1209 if (ret)
1210 break;
1211 }
1212 }
1213out:
1214 return ret;
1215}
1216
1217void init_request_from_bio(struct request *req, struct bio *bio)
1218{
1219 req->cpu = bio->bi_comp_cpu;
1220 req->cmd_type = REQ_TYPE_FS;
1221
1222 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1223 if (bio->bi_rw & REQ_RAHEAD)
1224 req->cmd_flags |= REQ_FAILFAST_MASK;
1225
1226 req->errors = 0;
1227 req->__sector = bio->bi_sector;
1228 req->ioprio = bio_prio(bio);
1229 blk_rq_bio_prep(req->q, req, bio);
1230}
1231
1232static int __make_request(struct request_queue *q, struct bio *bio)
1233{
1234 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1235 struct blk_plug *plug;
1236 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1237 struct request *req;
1238
1239
1240
1241
1242
1243
1244 blk_queue_bounce(q, &bio);
1245
1246 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1247 spin_lock_irq(q->queue_lock);
1248 where = ELEVATOR_INSERT_FLUSH;
1249 goto get_rq;
1250 }
1251
1252
1253
1254
1255
1256 if (attempt_plug_merge(current, q, bio))
1257 goto out;
1258
1259 spin_lock_irq(q->queue_lock);
1260
1261 el_ret = elv_merge(q, &req, bio);
1262 if (el_ret == ELEVATOR_BACK_MERGE) {
1263 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1264 if (bio_attempt_back_merge(q, req, bio)) {
1265 if (!attempt_back_merge(q, req))
1266 elv_merged_request(q, req, el_ret);
1267 goto out_unlock;
1268 }
1269 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1270 BUG_ON(req->cmd_flags & REQ_ON_PLUG);
1271 if (bio_attempt_front_merge(q, req, bio)) {
1272 if (!attempt_front_merge(q, req))
1273 elv_merged_request(q, req, el_ret);
1274 goto out_unlock;
1275 }
1276 }
1277
1278get_rq:
1279
1280
1281
1282
1283
1284 rw_flags = bio_data_dir(bio);
1285 if (sync)
1286 rw_flags |= REQ_SYNC;
1287
1288
1289
1290
1291
1292 req = get_request_wait(q, rw_flags, bio);
1293
1294
1295
1296
1297
1298
1299
1300 init_request_from_bio(req, bio);
1301
1302 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1303 bio_flagged(bio, BIO_CPU_AFFINE)) {
1304 req->cpu = blk_cpu_to_group(get_cpu());
1305 put_cpu();
1306 }
1307
1308 plug = current->plug;
1309 if (plug) {
1310
1311
1312
1313
1314
1315
1316 if (list_empty(&plug->list))
1317 trace_block_plug(q);
1318 else if (!plug->should_sort) {
1319 struct request *__rq;
1320
1321 __rq = list_entry_rq(plug->list.prev);
1322 if (__rq->q != q)
1323 plug->should_sort = 1;
1324 }
1325
1326
1327
1328 req->cmd_flags |= REQ_ON_PLUG;
1329 list_add_tail(&req->queuelist, &plug->list);
1330 drive_stat_acct(req, 1);
1331 } else {
1332 spin_lock_irq(q->queue_lock);
1333 add_acct_request(q, req, where);
1334 __blk_run_queue(q);
1335out_unlock:
1336 spin_unlock_irq(q->queue_lock);
1337 }
1338out:
1339 return 0;
1340}
1341
1342
1343
1344
1345static inline void blk_partition_remap(struct bio *bio)
1346{
1347 struct block_device *bdev = bio->bi_bdev;
1348
1349 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1350 struct hd_struct *p = bdev->bd_part;
1351
1352 bio->bi_sector += p->start_sect;
1353 bio->bi_bdev = bdev->bd_contains;
1354
1355 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1356 bdev->bd_dev,
1357 bio->bi_sector - p->start_sect);
1358 }
1359}
1360
1361static void handle_bad_sector(struct bio *bio)
1362{
1363 char b[BDEVNAME_SIZE];
1364
1365 printk(KERN_INFO "attempt to access beyond end of device\n");
1366 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1367 bdevname(bio->bi_bdev, b),
1368 bio->bi_rw,
1369 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1370 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1371
1372 set_bit(BIO_EOF, &bio->bi_flags);
1373}
1374
1375#ifdef CONFIG_FAIL_MAKE_REQUEST
1376
1377static DECLARE_FAULT_ATTR(fail_make_request);
1378
1379static int __init setup_fail_make_request(char *str)
1380{
1381 return setup_fault_attr(&fail_make_request, str);
1382}
1383__setup("fail_make_request=", setup_fail_make_request);
1384
1385static int should_fail_request(struct bio *bio)
1386{
1387 struct hd_struct *part = bio->bi_bdev->bd_part;
1388
1389 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1390 return should_fail(&fail_make_request, bio->bi_size);
1391
1392 return 0;
1393}
1394
1395static int __init fail_make_request_debugfs(void)
1396{
1397 return init_fault_attr_dentries(&fail_make_request,
1398 "fail_make_request");
1399}
1400
1401late_initcall(fail_make_request_debugfs);
1402
1403#else
1404
1405static inline int should_fail_request(struct bio *bio)
1406{
1407 return 0;
1408}
1409
1410#endif
1411
1412
1413
1414
1415static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1416{
1417 sector_t maxsector;
1418
1419 if (!nr_sectors)
1420 return 0;
1421
1422
1423 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1424 if (maxsector) {
1425 sector_t sector = bio->bi_sector;
1426
1427 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1428
1429
1430
1431
1432
1433 handle_bad_sector(bio);
1434 return 1;
1435 }
1436 }
1437
1438 return 0;
1439}
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465static inline void __generic_make_request(struct bio *bio)
1466{
1467 struct request_queue *q;
1468 sector_t old_sector;
1469 int ret, nr_sectors = bio_sectors(bio);
1470 dev_t old_dev;
1471 int err = -EIO;
1472
1473 might_sleep();
1474
1475 if (bio_check_eod(bio, nr_sectors))
1476 goto end_io;
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486 old_sector = -1;
1487 old_dev = 0;
1488 do {
1489 char b[BDEVNAME_SIZE];
1490
1491 q = bdev_get_queue(bio->bi_bdev);
1492 if (unlikely(!q)) {
1493 printk(KERN_ERR
1494 "generic_make_request: Trying to access "
1495 "nonexistent block-device %s (%Lu)\n",
1496 bdevname(bio->bi_bdev, b),
1497 (long long) bio->bi_sector);
1498 goto end_io;
1499 }
1500
1501 if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
1502 nr_sectors > queue_max_hw_sectors(q))) {
1503 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1504 bdevname(bio->bi_bdev, b),
1505 bio_sectors(bio),
1506 queue_max_hw_sectors(q));
1507 goto end_io;
1508 }
1509
1510 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
1511 goto end_io;
1512
1513 if (should_fail_request(bio))
1514 goto end_io;
1515
1516
1517
1518
1519
1520 blk_partition_remap(bio);
1521
1522 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1523 goto end_io;
1524
1525 if (old_sector != -1)
1526 trace_block_bio_remap(q, bio, old_dev, old_sector);
1527
1528 old_sector = bio->bi_sector;
1529 old_dev = bio->bi_bdev->bd_dev;
1530
1531 if (bio_check_eod(bio, nr_sectors))
1532 goto end_io;
1533
1534
1535
1536
1537
1538
1539 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
1540 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
1541 if (!nr_sectors) {
1542 err = 0;
1543 goto end_io;
1544 }
1545 }
1546
1547 if ((bio->bi_rw & REQ_DISCARD) &&
1548 (!blk_queue_discard(q) ||
1549 ((bio->bi_rw & REQ_SECURE) &&
1550 !blk_queue_secdiscard(q)))) {
1551 err = -EOPNOTSUPP;
1552 goto end_io;
1553 }
1554
1555 blk_throtl_bio(q, &bio);
1556
1557
1558
1559
1560
1561 if (!bio)
1562 break;
1563
1564 trace_block_bio_queue(q, bio);
1565
1566 ret = q->make_request_fn(q, bio);
1567 } while (ret);
1568
1569 return;
1570
1571end_io:
1572 bio_endio(bio, err);
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586void generic_make_request(struct bio *bio)
1587{
1588 struct bio_list bio_list_on_stack;
1589
1590 if (current->bio_list) {
1591
1592 bio_list_add(current->bio_list, bio);
1593 return;
1594 }
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613 BUG_ON(bio->bi_next);
1614 bio_list_init(&bio_list_on_stack);
1615 current->bio_list = &bio_list_on_stack;
1616 do {
1617 __generic_make_request(bio);
1618 bio = bio_list_pop(current->bio_list);
1619 } while (bio);
1620 current->bio_list = NULL;
1621}
1622EXPORT_SYMBOL(generic_make_request);
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634void submit_bio(int rw, struct bio *bio)
1635{
1636 int count = bio_sectors(bio);
1637
1638 bio->bi_rw |= rw;
1639
1640
1641
1642
1643
1644 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
1645 if (rw & WRITE) {
1646 count_vm_events(PGPGOUT, count);
1647 } else {
1648 task_io_account_read(bio->bi_size);
1649 count_vm_events(PGPGIN, count);
1650 }
1651
1652 if (unlikely(block_dump)) {
1653 char b[BDEVNAME_SIZE];
1654 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1655 current->comm, task_pid_nr(current),
1656 (rw & WRITE) ? "WRITE" : "READ",
1657 (unsigned long long)bio->bi_sector,
1658 bdevname(bio->bi_bdev, b),
1659 count);
1660 }
1661 }
1662
1663 generic_make_request(bio);
1664}
1665EXPORT_SYMBOL(submit_bio);
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1689{
1690 if (rq->cmd_flags & REQ_DISCARD)
1691 return 0;
1692
1693 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1694 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1695 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1696 return -EIO;
1697 }
1698
1699
1700
1701
1702
1703
1704
1705 blk_recalc_rq_segments(rq);
1706 if (rq->nr_phys_segments > queue_max_segments(q)) {
1707 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1708 return -EIO;
1709 }
1710
1711 return 0;
1712}
1713EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1714
1715
1716
1717
1718
1719
1720int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1721{
1722 unsigned long flags;
1723
1724 if (blk_rq_check_limits(q, rq))
1725 return -EIO;
1726
1727#ifdef CONFIG_FAIL_MAKE_REQUEST
1728 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1729 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1730 return -EIO;
1731#endif
1732
1733 spin_lock_irqsave(q->queue_lock, flags);
1734
1735
1736
1737
1738
1739 BUG_ON(blk_queued_rq(rq));
1740
1741 add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
1742 spin_unlock_irqrestore(q->queue_lock, flags);
1743
1744 return 0;
1745}
1746EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764unsigned int blk_rq_err_bytes(const struct request *rq)
1765{
1766 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1767 unsigned int bytes = 0;
1768 struct bio *bio;
1769
1770 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1771 return blk_rq_bytes(rq);
1772
1773
1774
1775
1776
1777
1778
1779
1780 for (bio = rq->bio; bio; bio = bio->bi_next) {
1781 if ((bio->bi_rw & ff) != ff)
1782 break;
1783 bytes += bio->bi_size;
1784 }
1785
1786
1787 BUG_ON(blk_rq_bytes(rq) && !bytes);
1788 return bytes;
1789}
1790EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1791
1792static void blk_account_io_completion(struct request *req, unsigned int bytes)
1793{
1794 if (blk_do_io_stat(req)) {
1795 const int rw = rq_data_dir(req);
1796 struct hd_struct *part;
1797 int cpu;
1798
1799 cpu = part_stat_lock();
1800 part = req->part;
1801 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1802 part_stat_unlock();
1803 }
1804}
1805
1806static void blk_account_io_done(struct request *req)
1807{
1808
1809
1810
1811
1812
1813 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
1814 unsigned long duration = jiffies - req->start_time;
1815 const int rw = rq_data_dir(req);
1816 struct hd_struct *part;
1817 int cpu;
1818
1819 cpu = part_stat_lock();
1820 part = req->part;
1821
1822 part_stat_inc(cpu, part, ios[rw]);
1823 part_stat_add(cpu, part, ticks[rw], duration);
1824 part_round_stats(cpu, part);
1825 part_dec_in_flight(part, rw);
1826
1827 hd_struct_put(part);
1828 part_stat_unlock();
1829 }
1830}
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848struct request *blk_peek_request(struct request_queue *q)
1849{
1850 struct request *rq;
1851 int ret;
1852
1853 while ((rq = __elv_next_request(q)) != NULL) {
1854 if (!(rq->cmd_flags & REQ_STARTED)) {
1855
1856
1857
1858
1859
1860 if (rq->cmd_flags & REQ_SORTED)
1861 elv_activate_rq(q, rq);
1862
1863
1864
1865
1866
1867
1868 rq->cmd_flags |= REQ_STARTED;
1869 trace_block_rq_issue(q, rq);
1870 }
1871
1872 if (!q->boundary_rq || q->boundary_rq == rq) {
1873 q->end_sector = rq_end_sector(rq);
1874 q->boundary_rq = NULL;
1875 }
1876
1877 if (rq->cmd_flags & REQ_DONTPREP)
1878 break;
1879
1880 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1881
1882
1883
1884
1885
1886
1887 rq->nr_phys_segments++;
1888 }
1889
1890 if (!q->prep_rq_fn)
1891 break;
1892
1893 ret = q->prep_rq_fn(q, rq);
1894 if (ret == BLKPREP_OK) {
1895 break;
1896 } else if (ret == BLKPREP_DEFER) {
1897
1898
1899
1900
1901
1902
1903 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1904 !(rq->cmd_flags & REQ_DONTPREP)) {
1905
1906
1907
1908
1909 --rq->nr_phys_segments;
1910 }
1911
1912 rq = NULL;
1913 break;
1914 } else if (ret == BLKPREP_KILL) {
1915 rq->cmd_flags |= REQ_QUIET;
1916
1917
1918
1919
1920 blk_start_request(rq);
1921 __blk_end_request_all(rq, -EIO);
1922 } else {
1923 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1924 break;
1925 }
1926 }
1927
1928 return rq;
1929}
1930EXPORT_SYMBOL(blk_peek_request);
1931
1932void blk_dequeue_request(struct request *rq)
1933{
1934 struct request_queue *q = rq->q;
1935
1936 BUG_ON(list_empty(&rq->queuelist));
1937 BUG_ON(ELV_ON_HASH(rq));
1938
1939 list_del_init(&rq->queuelist);
1940
1941
1942
1943
1944
1945
1946 if (blk_account_rq(rq)) {
1947 q->in_flight[rq_is_sync(rq)]++;
1948 set_io_start_time_ns(rq);
1949 }
1950}
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966void blk_start_request(struct request *req)
1967{
1968 blk_dequeue_request(req);
1969
1970
1971
1972
1973
1974 req->resid_len = blk_rq_bytes(req);
1975 if (unlikely(blk_bidi_rq(req)))
1976 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
1977
1978 blk_add_timer(req);
1979}
1980EXPORT_SYMBOL(blk_start_request);
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997struct request *blk_fetch_request(struct request_queue *q)
1998{
1999 struct request *rq;
2000
2001 rq = blk_peek_request(q);
2002 if (rq)
2003 blk_start_request(rq);
2004 return rq;
2005}
2006EXPORT_SYMBOL(blk_fetch_request);
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2031{
2032 int total_bytes, bio_nbytes, next_idx = 0;
2033 struct bio *bio;
2034
2035 if (!req->bio)
2036 return false;
2037
2038 trace_block_rq_complete(req->q, req);
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048 if (req->cmd_type == REQ_TYPE_FS)
2049 req->errors = 0;
2050
2051 if (error && req->cmd_type == REQ_TYPE_FS &&
2052 !(req->cmd_flags & REQ_QUIET)) {
2053 char *error_type;
2054
2055 switch (error) {
2056 case -ENOLINK:
2057 error_type = "recoverable transport";
2058 break;
2059 case -EREMOTEIO:
2060 error_type = "critical target";
2061 break;
2062 case -EBADE:
2063 error_type = "critical nexus";
2064 break;
2065 case -EIO:
2066 default:
2067 error_type = "I/O";
2068 break;
2069 }
2070 printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
2071 error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
2072 (unsigned long long)blk_rq_pos(req));
2073 }
2074
2075 blk_account_io_completion(req, nr_bytes);
2076
2077 total_bytes = bio_nbytes = 0;
2078 while ((bio = req->bio) != NULL) {
2079 int nbytes;
2080
2081 if (nr_bytes >= bio->bi_size) {
2082 req->bio = bio->bi_next;
2083 nbytes = bio->bi_size;
2084 req_bio_endio(req, bio, nbytes, error);
2085 next_idx = 0;
2086 bio_nbytes = 0;
2087 } else {
2088 int idx = bio->bi_idx + next_idx;
2089
2090 if (unlikely(idx >= bio->bi_vcnt)) {
2091 blk_dump_rq_flags(req, "__end_that");
2092 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2093 __func__, idx, bio->bi_vcnt);
2094 break;
2095 }
2096
2097 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2098 BIO_BUG_ON(nbytes > bio->bi_size);
2099
2100
2101
2102
2103 if (unlikely(nbytes > nr_bytes)) {
2104 bio_nbytes += nr_bytes;
2105 total_bytes += nr_bytes;
2106 break;
2107 }
2108
2109
2110
2111
2112 next_idx++;
2113 bio_nbytes += nbytes;
2114 }
2115
2116 total_bytes += nbytes;
2117 nr_bytes -= nbytes;
2118
2119 bio = req->bio;
2120 if (bio) {
2121
2122
2123
2124 if (unlikely(nr_bytes <= 0))
2125 break;
2126 }
2127 }
2128
2129
2130
2131
2132 if (!req->bio) {
2133
2134
2135
2136
2137
2138 req->__data_len = 0;
2139 return false;
2140 }
2141
2142
2143
2144
2145 if (bio_nbytes) {
2146 req_bio_endio(req, bio, bio_nbytes, error);
2147 bio->bi_idx += next_idx;
2148 bio_iovec(bio)->bv_offset += nr_bytes;
2149 bio_iovec(bio)->bv_len -= nr_bytes;
2150 }
2151
2152 req->__data_len -= total_bytes;
2153 req->buffer = bio_data(req->bio);
2154
2155
2156 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
2157 req->__sector += total_bytes >> 9;
2158
2159
2160 if (req->cmd_flags & REQ_MIXED_MERGE) {
2161 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2162 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2163 }
2164
2165
2166
2167
2168
2169 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2170 blk_dump_rq_flags(req, "request botched");
2171 req->__data_len = blk_rq_cur_bytes(req);
2172 }
2173
2174
2175 blk_recalc_rq_segments(req);
2176
2177 return true;
2178}
2179EXPORT_SYMBOL_GPL(blk_update_request);
2180
2181static bool blk_update_bidi_request(struct request *rq, int error,
2182 unsigned int nr_bytes,
2183 unsigned int bidi_bytes)
2184{
2185 if (blk_update_request(rq, error, nr_bytes))
2186 return true;
2187
2188
2189 if (unlikely(blk_bidi_rq(rq)) &&
2190 blk_update_request(rq->next_rq, error, bidi_bytes))
2191 return true;
2192
2193 if (blk_queue_add_random(rq->q))
2194 add_disk_randomness(rq->rq_disk);
2195
2196 return false;
2197}
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209void blk_unprep_request(struct request *req)
2210{
2211 struct request_queue *q = req->q;
2212
2213 req->cmd_flags &= ~REQ_DONTPREP;
2214 if (q->unprep_rq_fn)
2215 q->unprep_rq_fn(q, req);
2216}
2217EXPORT_SYMBOL_GPL(blk_unprep_request);
2218
2219
2220
2221
2222static void blk_finish_request(struct request *req, int error)
2223{
2224 if (blk_rq_tagged(req))
2225 blk_queue_end_tag(req->q, req);
2226
2227 BUG_ON(blk_queued_rq(req));
2228
2229 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2230 laptop_io_completion(&req->q->backing_dev_info);
2231
2232 blk_delete_timer(req);
2233
2234 if (req->cmd_flags & REQ_DONTPREP)
2235 blk_unprep_request(req);
2236
2237
2238 blk_account_io_done(req);
2239
2240 if (req->end_io)
2241 req->end_io(req, error);
2242 else {
2243 if (blk_bidi_rq(req))
2244 __blk_put_request(req->next_rq->q, req->next_rq);
2245
2246 __blk_put_request(req->q, req);
2247 }
2248}
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267static bool blk_end_bidi_request(struct request *rq, int error,
2268 unsigned int nr_bytes, unsigned int bidi_bytes)
2269{
2270 struct request_queue *q = rq->q;
2271 unsigned long flags;
2272
2273 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2274 return true;
2275
2276 spin_lock_irqsave(q->queue_lock, flags);
2277 blk_finish_request(rq, error);
2278 spin_unlock_irqrestore(q->queue_lock, flags);
2279
2280 return false;
2281}
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298static bool __blk_end_bidi_request(struct request *rq, int error,
2299 unsigned int nr_bytes, unsigned int bidi_bytes)
2300{
2301 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2302 return true;
2303
2304 blk_finish_request(rq, error);
2305
2306 return false;
2307}
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2324{
2325 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2326}
2327EXPORT_SYMBOL(blk_end_request);
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337void blk_end_request_all(struct request *rq, int error)
2338{
2339 bool pending;
2340 unsigned int bidi_bytes = 0;
2341
2342 if (unlikely(blk_bidi_rq(rq)))
2343 bidi_bytes = blk_rq_bytes(rq->next_rq);
2344
2345 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2346 BUG_ON(pending);
2347}
2348EXPORT_SYMBOL(blk_end_request_all);
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362bool blk_end_request_cur(struct request *rq, int error)
2363{
2364 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2365}
2366EXPORT_SYMBOL(blk_end_request_cur);
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380bool blk_end_request_err(struct request *rq, int error)
2381{
2382 WARN_ON(error >= 0);
2383 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2384}
2385EXPORT_SYMBOL_GPL(blk_end_request_err);
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2401{
2402 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2403}
2404EXPORT_SYMBOL(__blk_end_request);
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414void __blk_end_request_all(struct request *rq, int error)
2415{
2416 bool pending;
2417 unsigned int bidi_bytes = 0;
2418
2419 if (unlikely(blk_bidi_rq(rq)))
2420 bidi_bytes = blk_rq_bytes(rq->next_rq);
2421
2422 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2423 BUG_ON(pending);
2424}
2425EXPORT_SYMBOL(__blk_end_request_all);
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440bool __blk_end_request_cur(struct request *rq, int error)
2441{
2442 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2443}
2444EXPORT_SYMBOL(__blk_end_request_cur);
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459bool __blk_end_request_err(struct request *rq, int error)
2460{
2461 WARN_ON(error >= 0);
2462 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2463}
2464EXPORT_SYMBOL_GPL(__blk_end_request_err);
2465
2466void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2467 struct bio *bio)
2468{
2469
2470 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2471
2472 if (bio_has_data(bio)) {
2473 rq->nr_phys_segments = bio_phys_segments(q, bio);
2474 rq->buffer = bio_data(bio);
2475 }
2476 rq->__data_len = bio->bi_size;
2477 rq->bio = rq->biotail = bio;
2478
2479 if (bio->bi_bdev)
2480 rq->rq_disk = bio->bi_bdev->bd_disk;
2481}
2482
2483#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2484
2485
2486
2487
2488
2489
2490
2491void rq_flush_dcache_pages(struct request *rq)
2492{
2493 struct req_iterator iter;
2494 struct bio_vec *bvec;
2495
2496 rq_for_each_segment(bvec, rq, iter)
2497 flush_dcache_page(bvec->bv_page);
2498}
2499EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2500#endif
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521int blk_lld_busy(struct request_queue *q)
2522{
2523 if (q->lld_busy_fn)
2524 return q->lld_busy_fn(q);
2525
2526 return 0;
2527}
2528EXPORT_SYMBOL_GPL(blk_lld_busy);
2529
2530
2531
2532
2533
2534
2535
2536
2537void blk_rq_unprep_clone(struct request *rq)
2538{
2539 struct bio *bio;
2540
2541 while ((bio = rq->bio) != NULL) {
2542 rq->bio = bio->bi_next;
2543
2544 bio_put(bio);
2545 }
2546}
2547EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2548
2549
2550
2551
2552
2553static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2554{
2555 dst->cpu = src->cpu;
2556 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
2557 dst->cmd_type = src->cmd_type;
2558 dst->__sector = blk_rq_pos(src);
2559 dst->__data_len = blk_rq_bytes(src);
2560 dst->nr_phys_segments = src->nr_phys_segments;
2561 dst->ioprio = src->ioprio;
2562 dst->extra_len = src->extra_len;
2563}
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2585 struct bio_set *bs, gfp_t gfp_mask,
2586 int (*bio_ctr)(struct bio *, struct bio *, void *),
2587 void *data)
2588{
2589 struct bio *bio, *bio_src;
2590
2591 if (!bs)
2592 bs = fs_bio_set;
2593
2594 blk_rq_init(NULL, rq);
2595
2596 __rq_for_each_bio(bio_src, rq_src) {
2597 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2598 if (!bio)
2599 goto free_and_out;
2600
2601 __bio_clone(bio, bio_src);
2602
2603 if (bio_integrity(bio_src) &&
2604 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2605 goto free_and_out;
2606
2607 if (bio_ctr && bio_ctr(bio, bio_src, data))
2608 goto free_and_out;
2609
2610 if (rq->bio) {
2611 rq->biotail->bi_next = bio;
2612 rq->biotail = bio;
2613 } else
2614 rq->bio = rq->biotail = bio;
2615 }
2616
2617 __blk_rq_prep_clone(rq, rq_src);
2618
2619 return 0;
2620
2621free_and_out:
2622 if (bio)
2623 bio_free(bio, bs);
2624 blk_rq_unprep_clone(rq);
2625
2626 return -ENOMEM;
2627}
2628EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2629
2630int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2631{
2632 return queue_work(kblockd_workqueue, work);
2633}
2634EXPORT_SYMBOL(kblockd_schedule_work);
2635
2636int kblockd_schedule_delayed_work(struct request_queue *q,
2637 struct delayed_work *dwork, unsigned long delay)
2638{
2639 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2640}
2641EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2642
2643#define PLUG_MAGIC 0x91827364
2644
2645void blk_start_plug(struct blk_plug *plug)
2646{
2647 struct task_struct *tsk = current;
2648
2649 plug->magic = PLUG_MAGIC;
2650 INIT_LIST_HEAD(&plug->list);
2651 INIT_LIST_HEAD(&plug->cb_list);
2652 plug->should_sort = 0;
2653
2654
2655
2656
2657
2658 if (!tsk->plug) {
2659
2660
2661
2662
2663 tsk->plug = plug;
2664 }
2665}
2666EXPORT_SYMBOL(blk_start_plug);
2667
2668static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2669{
2670 struct request *rqa = container_of(a, struct request, queuelist);
2671 struct request *rqb = container_of(b, struct request, queuelist);
2672
2673 return !(rqa->q <= rqb->q);
2674}
2675
2676
2677
2678
2679
2680
2681
2682static void queue_unplugged(struct request_queue *q, unsigned int depth,
2683 bool from_schedule)
2684 __releases(q->queue_lock)
2685{
2686 trace_block_unplug(q, depth, !from_schedule);
2687
2688
2689
2690
2691
2692
2693 if (from_schedule) {
2694 spin_unlock(q->queue_lock);
2695 blk_run_queue_async(q);
2696 } else {
2697 __blk_run_queue(q);
2698 spin_unlock(q->queue_lock);
2699 }
2700
2701}
2702
2703static void flush_plug_callbacks(struct blk_plug *plug)
2704{
2705 LIST_HEAD(callbacks);
2706
2707 if (list_empty(&plug->cb_list))
2708 return;
2709
2710 list_splice_init(&plug->cb_list, &callbacks);
2711
2712 while (!list_empty(&callbacks)) {
2713 struct blk_plug_cb *cb = list_first_entry(&callbacks,
2714 struct blk_plug_cb,
2715 list);
2716 list_del(&cb->list);
2717 cb->callback(cb);
2718 }
2719}
2720
2721void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2722{
2723 struct request_queue *q;
2724 unsigned long flags;
2725 struct request *rq;
2726 LIST_HEAD(list);
2727 unsigned int depth;
2728
2729 BUG_ON(plug->magic != PLUG_MAGIC);
2730
2731 flush_plug_callbacks(plug);
2732 if (list_empty(&plug->list))
2733 return;
2734
2735 list_splice_init(&plug->list, &list);
2736
2737 if (plug->should_sort) {
2738 list_sort(NULL, &list, plug_rq_cmp);
2739 plug->should_sort = 0;
2740 }
2741
2742 q = NULL;
2743 depth = 0;
2744
2745
2746
2747
2748
2749 local_irq_save(flags);
2750 while (!list_empty(&list)) {
2751 rq = list_entry_rq(list.next);
2752 list_del_init(&rq->queuelist);
2753 BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
2754 BUG_ON(!rq->q);
2755 if (rq->q != q) {
2756
2757
2758
2759 if (q)
2760 queue_unplugged(q, depth, from_schedule);
2761 q = rq->q;
2762 depth = 0;
2763 spin_lock(q->queue_lock);
2764 }
2765 rq->cmd_flags &= ~REQ_ON_PLUG;
2766
2767
2768
2769
2770 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
2771 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
2772 else
2773 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
2774
2775 depth++;
2776 }
2777
2778
2779
2780
2781 if (q)
2782 queue_unplugged(q, depth, from_schedule);
2783
2784 local_irq_restore(flags);
2785}
2786
2787void blk_finish_plug(struct blk_plug *plug)
2788{
2789 blk_flush_plug_list(plug, false);
2790
2791 if (plug == current->plug)
2792 current->plug = NULL;
2793}
2794EXPORT_SYMBOL(blk_finish_plug);
2795
2796int __init blk_dev_init(void)
2797{
2798 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2799 sizeof(((struct request *)0)->cmd_flags));
2800
2801
2802 kblockd_workqueue = alloc_workqueue("kblockd",
2803 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2804 if (!kblockd_workqueue)
2805 panic("Failed to create kblockd\n");
2806
2807 request_cachep = kmem_cache_create("blkdev_requests",
2808 sizeof(struct request), 0, SLAB_PANIC, NULL);
2809
2810 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2811 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2812
2813 return 0;
2814}
2815