1
2
3
4
5
6
7
8
9
10
11
12
13
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/backing-dev.h>
17#include <linux/bio.h>
18#include <linux/blkdev.h>
19#include <linux/highmem.h>
20#include <linux/mm.h>
21#include <linux/kernel_stat.h>
22#include <linux/string.h>
23#include <linux/init.h>
24#include <linux/completion.h>
25#include <linux/slab.h>
26#include <linux/swap.h>
27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h>
29#include <linux/fault-inject.h>
30#include <linux/list_sort.h>
31#include <linux/delay.h>
32
33#define CREATE_TRACE_POINTS
34#include <trace/events/block.h>
35
36#include "blk.h"
37
38EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
39EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
40EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
41
42
43
44
45static struct kmem_cache *request_cachep;
46
47
48
49
50struct kmem_cache *blk_requestq_cachep;
51
52
53
54
55static struct workqueue_struct *kblockd_workqueue;
56
57static void drive_stat_acct(struct request *rq, int new_io)
58{
59 struct hd_struct *part;
60 int rw = rq_data_dir(rq);
61 int cpu;
62
63 if (!blk_do_io_stat(rq))
64 return;
65
66 cpu = part_stat_lock();
67
68 if (!new_io) {
69 part = rq->part;
70 part_stat_inc(cpu, part, merges[rw]);
71 } else {
72 part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
73 if (!hd_struct_try_get(part)) {
74
75
76
77
78
79
80
81
82 part = &rq->rq_disk->part0;
83 hd_struct_get(part);
84 }
85 part_round_stats(cpu, part);
86 part_inc_in_flight(part, rw);
87 rq->part = part;
88 }
89
90 part_stat_unlock();
91}
92
93void blk_queue_congestion_threshold(struct request_queue *q)
94{
95 int nr;
96
97 nr = q->nr_requests - (q->nr_requests / 8) + 1;
98 if (nr > q->nr_requests)
99 nr = q->nr_requests;
100 q->nr_congestion_on = nr;
101
102 nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
103 if (nr < 1)
104 nr = 1;
105 q->nr_congestion_off = nr;
106}
107
108
109
110
111
112
113
114
115
116
117struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
118{
119 struct backing_dev_info *ret = NULL;
120 struct request_queue *q = bdev_get_queue(bdev);
121
122 if (q)
123 ret = &q->backing_dev_info;
124 return ret;
125}
126EXPORT_SYMBOL(blk_get_backing_dev_info);
127
128void blk_rq_init(struct request_queue *q, struct request *rq)
129{
130 memset(rq, 0, sizeof(*rq));
131
132 INIT_LIST_HEAD(&rq->queuelist);
133 INIT_LIST_HEAD(&rq->timeout_list);
134 rq->cpu = -1;
135 rq->q = q;
136 rq->__sector = (sector_t) -1;
137 INIT_HLIST_NODE(&rq->hash);
138 RB_CLEAR_NODE(&rq->rb_node);
139 rq->cmd = rq->__cmd;
140 rq->cmd_len = BLK_MAX_CDB;
141 rq->tag = -1;
142 rq->ref_count = 1;
143 rq->start_time = jiffies;
144 set_start_time_ns(rq);
145 rq->part = NULL;
146}
147EXPORT_SYMBOL(blk_rq_init);
148
149static void req_bio_endio(struct request *rq, struct bio *bio,
150 unsigned int nbytes, int error)
151{
152 if (error)
153 clear_bit(BIO_UPTODATE, &bio->bi_flags);
154 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
155 error = -EIO;
156
157 if (unlikely(nbytes > bio->bi_size)) {
158 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
159 __func__, nbytes, bio->bi_size);
160 nbytes = bio->bi_size;
161 }
162
163 if (unlikely(rq->cmd_flags & REQ_QUIET))
164 set_bit(BIO_QUIET, &bio->bi_flags);
165
166 bio->bi_size -= nbytes;
167 bio->bi_sector += (nbytes >> 9);
168
169 if (bio_integrity(bio))
170 bio_integrity_advance(bio, nbytes);
171
172
173 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
174 bio_endio(bio, error);
175}
176
177void blk_dump_rq_flags(struct request *rq, char *msg)
178{
179 int bit;
180
181 printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg,
182 rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
183 rq->cmd_flags);
184
185 printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n",
186 (unsigned long long)blk_rq_pos(rq),
187 blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
188 printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n",
189 rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
190
191 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
192 printk(KERN_INFO " cdb: ");
193 for (bit = 0; bit < BLK_MAX_CDB; bit++)
194 printk("%02x ", rq->cmd[bit]);
195 printk("\n");
196 }
197}
198EXPORT_SYMBOL(blk_dump_rq_flags);
199
200static void blk_delay_work(struct work_struct *work)
201{
202 struct request_queue *q;
203
204 q = container_of(work, struct request_queue, delay_work.work);
205 spin_lock_irq(q->queue_lock);
206 __blk_run_queue(q);
207 spin_unlock_irq(q->queue_lock);
208}
209
210
211
212
213
214
215
216
217
218
219
220void blk_delay_queue(struct request_queue *q, unsigned long msecs)
221{
222 queue_delayed_work(kblockd_workqueue, &q->delay_work,
223 msecs_to_jiffies(msecs));
224}
225EXPORT_SYMBOL(blk_delay_queue);
226
227
228
229
230
231
232
233
234
235
236void blk_start_queue(struct request_queue *q)
237{
238 WARN_ON(!irqs_disabled());
239
240 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
241 __blk_run_queue(q);
242}
243EXPORT_SYMBOL(blk_start_queue);
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259void blk_stop_queue(struct request_queue *q)
260{
261 __cancel_delayed_work(&q->delay_work);
262 queue_flag_set(QUEUE_FLAG_STOPPED, q);
263}
264EXPORT_SYMBOL(blk_stop_queue);
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284void blk_sync_queue(struct request_queue *q)
285{
286 del_timer_sync(&q->timeout);
287 cancel_delayed_work_sync(&q->delay_work);
288}
289EXPORT_SYMBOL(blk_sync_queue);
290
291
292
293
294
295
296
297
298
299void __blk_run_queue(struct request_queue *q)
300{
301 if (unlikely(blk_queue_stopped(q)))
302 return;
303
304 q->request_fn(q);
305}
306EXPORT_SYMBOL(__blk_run_queue);
307
308
309
310
311
312
313
314
315
316void blk_run_queue_async(struct request_queue *q)
317{
318 if (likely(!blk_queue_stopped(q))) {
319 __cancel_delayed_work(&q->delay_work);
320 queue_delayed_work(kblockd_workqueue, &q->delay_work, 0);
321 }
322}
323EXPORT_SYMBOL(blk_run_queue_async);
324
325
326
327
328
329
330
331
332
333void blk_run_queue(struct request_queue *q)
334{
335 unsigned long flags;
336
337 spin_lock_irqsave(q->queue_lock, flags);
338 __blk_run_queue(q);
339 spin_unlock_irqrestore(q->queue_lock, flags);
340}
341EXPORT_SYMBOL(blk_run_queue);
342
343void blk_put_queue(struct request_queue *q)
344{
345 kobject_put(&q->kobj);
346}
347EXPORT_SYMBOL(blk_put_queue);
348
349
350
351
352
353
354
355
356
357
358void blk_drain_queue(struct request_queue *q, bool drain_all)
359{
360 while (true) {
361 int nr_rqs;
362
363 spin_lock_irq(q->queue_lock);
364
365 elv_drain_elevator(q);
366 if (drain_all)
367 blk_throtl_drain(q);
368
369
370
371
372
373
374
375 if (!list_empty(&q->queue_head))
376 __blk_run_queue(q);
377
378 if (drain_all)
379 nr_rqs = q->rq.count[0] + q->rq.count[1];
380 else
381 nr_rqs = q->rq.elvpriv;
382
383 spin_unlock_irq(q->queue_lock);
384
385 if (!nr_rqs)
386 break;
387 msleep(10);
388 }
389}
390
391
392
393
394
395
396
397
398void blk_cleanup_queue(struct request_queue *q)
399{
400 spinlock_t *lock = q->queue_lock;
401
402
403 mutex_lock(&q->sysfs_lock);
404 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
405
406 spin_lock_irq(lock);
407 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
408 queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
409 queue_flag_set(QUEUE_FLAG_DEAD, q);
410
411 if (q->queue_lock != &q->__queue_lock)
412 q->queue_lock = &q->__queue_lock;
413
414 spin_unlock_irq(lock);
415 mutex_unlock(&q->sysfs_lock);
416
417
418
419
420
421
422 if (q->elevator)
423 blk_drain_queue(q, true);
424
425
426 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
427 blk_sync_queue(q);
428
429
430 blk_put_queue(q);
431}
432EXPORT_SYMBOL(blk_cleanup_queue);
433
434static int blk_init_free_list(struct request_queue *q)
435{
436 struct request_list *rl = &q->rq;
437
438 if (unlikely(rl->rq_pool))
439 return 0;
440
441 rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
442 rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
443 rl->elvpriv = 0;
444 init_waitqueue_head(&rl->wait[BLK_RW_SYNC]);
445 init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]);
446
447 rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
448 mempool_free_slab, request_cachep, q->node);
449
450 if (!rl->rq_pool)
451 return -ENOMEM;
452
453 return 0;
454}
455
456struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
457{
458 return blk_alloc_queue_node(gfp_mask, -1);
459}
460EXPORT_SYMBOL(blk_alloc_queue);
461
462struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
463{
464 struct request_queue *q;
465 int err;
466
467 q = kmem_cache_alloc_node(blk_requestq_cachep,
468 gfp_mask | __GFP_ZERO, node_id);
469 if (!q)
470 return NULL;
471
472 q->backing_dev_info.ra_pages =
473 (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
474 q->backing_dev_info.state = 0;
475 q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
476 q->backing_dev_info.name = "block";
477 q->node = node_id;
478
479 err = bdi_init(&q->backing_dev_info);
480 if (err) {
481 kmem_cache_free(blk_requestq_cachep, q);
482 return NULL;
483 }
484
485 if (blk_throtl_init(q)) {
486 kmem_cache_free(blk_requestq_cachep, q);
487 return NULL;
488 }
489
490 setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
491 laptop_mode_timer_fn, (unsigned long) q);
492 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
493 INIT_LIST_HEAD(&q->timeout_list);
494 INIT_LIST_HEAD(&q->flush_queue[0]);
495 INIT_LIST_HEAD(&q->flush_queue[1]);
496 INIT_LIST_HEAD(&q->flush_data_in_flight);
497 INIT_DELAYED_WORK(&q->delay_work, blk_delay_work);
498
499 kobject_init(&q->kobj, &blk_queue_ktype);
500
501 mutex_init(&q->sysfs_lock);
502 spin_lock_init(&q->__queue_lock);
503
504
505
506
507
508 q->queue_lock = &q->__queue_lock;
509
510 return q;
511}
512EXPORT_SYMBOL(blk_alloc_queue_node);
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547struct request_queue *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock)
548{
549 return blk_init_queue_node(rfn, lock, -1);
550}
551EXPORT_SYMBOL(blk_init_queue);
552
553struct request_queue *
554blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
555{
556 struct request_queue *uninit_q, *q;
557
558 uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
559 if (!uninit_q)
560 return NULL;
561
562 q = blk_init_allocated_queue(uninit_q, rfn, lock);
563 if (!q)
564 blk_cleanup_queue(uninit_q);
565
566 return q;
567}
568EXPORT_SYMBOL(blk_init_queue_node);
569
570struct request_queue *
571blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
572 spinlock_t *lock)
573{
574 if (!q)
575 return NULL;
576
577 if (blk_init_free_list(q))
578 return NULL;
579
580 q->request_fn = rfn;
581 q->prep_rq_fn = NULL;
582 q->unprep_rq_fn = NULL;
583 q->queue_flags = QUEUE_FLAG_DEFAULT;
584
585
586 if (lock)
587 q->queue_lock = lock;
588
589
590
591
592 blk_queue_make_request(q, blk_queue_bio);
593
594 q->sg_reserved_size = INT_MAX;
595
596
597
598
599 if (!elevator_init(q, NULL)) {
600 blk_queue_congestion_threshold(q);
601 return q;
602 }
603
604 return NULL;
605}
606EXPORT_SYMBOL(blk_init_allocated_queue);
607
608int blk_get_queue(struct request_queue *q)
609{
610 if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) {
611 kobject_get(&q->kobj);
612 return 0;
613 }
614
615 return 1;
616}
617EXPORT_SYMBOL(blk_get_queue);
618
619static inline void blk_free_request(struct request_queue *q, struct request *rq)
620{
621 if (rq->cmd_flags & REQ_ELVPRIV)
622 elv_put_request(q, rq);
623 mempool_free(rq, q->rq.rq_pool);
624}
625
626static struct request *
627blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask)
628{
629 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
630
631 if (!rq)
632 return NULL;
633
634 blk_rq_init(q, rq);
635
636 rq->cmd_flags = flags | REQ_ALLOCED;
637
638 if ((flags & REQ_ELVPRIV) &&
639 unlikely(elv_set_request(q, rq, gfp_mask))) {
640 mempool_free(rq, q->rq.rq_pool);
641 return NULL;
642 }
643
644 return rq;
645}
646
647
648
649
650
651static inline int ioc_batching(struct request_queue *q, struct io_context *ioc)
652{
653 if (!ioc)
654 return 0;
655
656
657
658
659
660
661 return ioc->nr_batch_requests == q->nr_batching ||
662 (ioc->nr_batch_requests > 0
663 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
664}
665
666
667
668
669
670
671
672static void ioc_set_batching(struct request_queue *q, struct io_context *ioc)
673{
674 if (!ioc || ioc_batching(q, ioc))
675 return;
676
677 ioc->nr_batch_requests = q->nr_batching;
678 ioc->last_waited = jiffies;
679}
680
681static void __freed_request(struct request_queue *q, int sync)
682{
683 struct request_list *rl = &q->rq;
684
685 if (rl->count[sync] < queue_congestion_off_threshold(q))
686 blk_clear_queue_congested(q, sync);
687
688 if (rl->count[sync] + 1 <= q->nr_requests) {
689 if (waitqueue_active(&rl->wait[sync]))
690 wake_up(&rl->wait[sync]);
691
692 blk_clear_queue_full(q, sync);
693 }
694}
695
696
697
698
699
700static void freed_request(struct request_queue *q, unsigned int flags)
701{
702 struct request_list *rl = &q->rq;
703 int sync = rw_is_sync(flags);
704
705 rl->count[sync]--;
706 if (flags & REQ_ELVPRIV)
707 rl->elvpriv--;
708
709 __freed_request(q, sync);
710
711 if (unlikely(rl->starved[sync ^ 1]))
712 __freed_request(q, sync ^ 1);
713}
714
715
716
717
718
719static bool blk_rq_should_init_elevator(struct bio *bio)
720{
721 if (!bio)
722 return true;
723
724
725
726
727
728 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
729 return false;
730
731 return true;
732}
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748static struct request *get_request(struct request_queue *q, int rw_flags,
749 struct bio *bio, gfp_t gfp_mask)
750{
751 struct request *rq = NULL;
752 struct request_list *rl = &q->rq;
753 struct io_context *ioc = NULL;
754 const bool is_sync = rw_is_sync(rw_flags) != 0;
755 int may_queue;
756
757 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
758 return NULL;
759
760 may_queue = elv_may_queue(q, rw_flags);
761 if (may_queue == ELV_MQUEUE_NO)
762 goto rq_starved;
763
764 if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) {
765 if (rl->count[is_sync]+1 >= q->nr_requests) {
766 ioc = current_io_context(GFP_ATOMIC, q->node);
767
768
769
770
771
772
773 if (!blk_queue_full(q, is_sync)) {
774 ioc_set_batching(q, ioc);
775 blk_set_queue_full(q, is_sync);
776 } else {
777 if (may_queue != ELV_MQUEUE_MUST
778 && !ioc_batching(q, ioc)) {
779
780
781
782
783
784 goto out;
785 }
786 }
787 }
788 blk_set_queue_congested(q, is_sync);
789 }
790
791
792
793
794
795
796 if (rl->count[is_sync] >= (3 * q->nr_requests / 2))
797 goto out;
798
799 rl->count[is_sync]++;
800 rl->starved[is_sync] = 0;
801
802 if (blk_rq_should_init_elevator(bio) &&
803 !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) {
804 rw_flags |= REQ_ELVPRIV;
805 rl->elvpriv++;
806 }
807
808 if (blk_queue_io_stat(q))
809 rw_flags |= REQ_IO_STAT;
810 spin_unlock_irq(q->queue_lock);
811
812 rq = blk_alloc_request(q, rw_flags, gfp_mask);
813 if (unlikely(!rq)) {
814
815
816
817
818
819
820
821 spin_lock_irq(q->queue_lock);
822 freed_request(q, rw_flags);
823
824
825
826
827
828
829
830
831rq_starved:
832 if (unlikely(rl->count[is_sync] == 0))
833 rl->starved[is_sync] = 1;
834
835 goto out;
836 }
837
838
839
840
841
842
843
844 if (ioc_batching(q, ioc))
845 ioc->nr_batch_requests--;
846
847 trace_block_getrq(q, bio, rw_flags & 1);
848out:
849 return rq;
850}
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865static struct request *get_request_wait(struct request_queue *q, int rw_flags,
866 struct bio *bio)
867{
868 const bool is_sync = rw_is_sync(rw_flags) != 0;
869 struct request *rq;
870
871 rq = get_request(q, rw_flags, bio, GFP_NOIO);
872 while (!rq) {
873 DEFINE_WAIT(wait);
874 struct io_context *ioc;
875 struct request_list *rl = &q->rq;
876
877 if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
878 return NULL;
879
880 prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
881 TASK_UNINTERRUPTIBLE);
882
883 trace_block_sleeprq(q, bio, rw_flags & 1);
884
885 spin_unlock_irq(q->queue_lock);
886 io_schedule();
887
888
889
890
891
892
893
894 ioc = current_io_context(GFP_NOIO, q->node);
895 ioc_set_batching(q, ioc);
896
897 spin_lock_irq(q->queue_lock);
898 finish_wait(&rl->wait[is_sync], &wait);
899
900 rq = get_request(q, rw_flags, bio, GFP_NOIO);
901 };
902
903 return rq;
904}
905
906struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
907{
908 struct request *rq;
909
910 BUG_ON(rw != READ && rw != WRITE);
911
912 spin_lock_irq(q->queue_lock);
913 if (gfp_mask & __GFP_WAIT)
914 rq = get_request_wait(q, rw, NULL);
915 else
916 rq = get_request(q, rw, NULL, gfp_mask);
917 if (!rq)
918 spin_unlock_irq(q->queue_lock);
919
920
921 return rq;
922}
923EXPORT_SYMBOL(blk_get_request);
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956struct request *blk_make_request(struct request_queue *q, struct bio *bio,
957 gfp_t gfp_mask)
958{
959 struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
960
961 if (unlikely(!rq))
962 return ERR_PTR(-ENOMEM);
963
964 for_each_bio(bio) {
965 struct bio *bounce_bio = bio;
966 int ret;
967
968 blk_queue_bounce(q, &bounce_bio);
969 ret = blk_rq_append_bio(q, rq, bounce_bio);
970 if (unlikely(ret)) {
971 blk_put_request(rq);
972 return ERR_PTR(ret);
973 }
974 }
975
976 return rq;
977}
978EXPORT_SYMBOL(blk_make_request);
979
980
981
982
983
984
985
986
987
988
989
990void blk_requeue_request(struct request_queue *q, struct request *rq)
991{
992 blk_delete_timer(rq);
993 blk_clear_rq_complete(rq);
994 trace_block_rq_requeue(q, rq);
995
996 if (blk_rq_tagged(rq))
997 blk_queue_end_tag(q, rq);
998
999 BUG_ON(blk_queued_rq(rq));
1000
1001 elv_requeue_request(q, rq);
1002}
1003EXPORT_SYMBOL(blk_requeue_request);
1004
1005static void add_acct_request(struct request_queue *q, struct request *rq,
1006 int where)
1007{
1008 drive_stat_acct(rq, 1);
1009 __elv_add_request(q, rq, where);
1010}
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031void blk_insert_request(struct request_queue *q, struct request *rq,
1032 int at_head, void *data)
1033{
1034 int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
1035 unsigned long flags;
1036
1037
1038
1039
1040
1041
1042 rq->cmd_type = REQ_TYPE_SPECIAL;
1043
1044 rq->special = data;
1045
1046 spin_lock_irqsave(q->queue_lock, flags);
1047
1048
1049
1050
1051 if (blk_rq_tagged(rq))
1052 blk_queue_end_tag(q, rq);
1053
1054 add_acct_request(q, rq, where);
1055 __blk_run_queue(q);
1056 spin_unlock_irqrestore(q->queue_lock, flags);
1057}
1058EXPORT_SYMBOL(blk_insert_request);
1059
1060static void part_round_stats_single(int cpu, struct hd_struct *part,
1061 unsigned long now)
1062{
1063 if (now == part->stamp)
1064 return;
1065
1066 if (part_in_flight(part)) {
1067 __part_stat_add(cpu, part, time_in_queue,
1068 part_in_flight(part) * (now - part->stamp));
1069 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1070 }
1071 part->stamp = now;
1072}
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090void part_round_stats(int cpu, struct hd_struct *part)
1091{
1092 unsigned long now = jiffies;
1093
1094 if (part->partno)
1095 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1096 part_round_stats_single(cpu, part, now);
1097}
1098EXPORT_SYMBOL_GPL(part_round_stats);
1099
1100
1101
1102
1103void __blk_put_request(struct request_queue *q, struct request *req)
1104{
1105 if (unlikely(!q))
1106 return;
1107 if (unlikely(--req->ref_count))
1108 return;
1109
1110 elv_completed_request(q, req);
1111
1112
1113 WARN_ON(req->bio != NULL);
1114
1115
1116
1117
1118
1119 if (req->cmd_flags & REQ_ALLOCED) {
1120 unsigned int flags = req->cmd_flags;
1121
1122 BUG_ON(!list_empty(&req->queuelist));
1123 BUG_ON(!hlist_unhashed(&req->hash));
1124
1125 blk_free_request(q, req);
1126 freed_request(q, flags);
1127 }
1128}
1129EXPORT_SYMBOL_GPL(__blk_put_request);
1130
1131void blk_put_request(struct request *req)
1132{
1133 unsigned long flags;
1134 struct request_queue *q = req->q;
1135
1136 spin_lock_irqsave(q->queue_lock, flags);
1137 __blk_put_request(q, req);
1138 spin_unlock_irqrestore(q->queue_lock, flags);
1139}
1140EXPORT_SYMBOL(blk_put_request);
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155void blk_add_request_payload(struct request *rq, struct page *page,
1156 unsigned int len)
1157{
1158 struct bio *bio = rq->bio;
1159
1160 bio->bi_io_vec->bv_page = page;
1161 bio->bi_io_vec->bv_offset = 0;
1162 bio->bi_io_vec->bv_len = len;
1163
1164 bio->bi_size = len;
1165 bio->bi_vcnt = 1;
1166 bio->bi_phys_segments = 1;
1167
1168 rq->__data_len = rq->resid_len = len;
1169 rq->nr_phys_segments = 1;
1170 rq->buffer = bio_data(bio);
1171}
1172EXPORT_SYMBOL_GPL(blk_add_request_payload);
1173
1174static bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1175 struct bio *bio)
1176{
1177 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1178
1179 if (!ll_back_merge_fn(q, req, bio))
1180 return false;
1181
1182 trace_block_bio_backmerge(q, bio);
1183
1184 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1185 blk_rq_set_mixed_merge(req);
1186
1187 req->biotail->bi_next = bio;
1188 req->biotail = bio;
1189 req->__data_len += bio->bi_size;
1190 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1191
1192 drive_stat_acct(req, 0);
1193 elv_bio_merged(q, req, bio);
1194 return true;
1195}
1196
1197static bool bio_attempt_front_merge(struct request_queue *q,
1198 struct request *req, struct bio *bio)
1199{
1200 const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1201
1202 if (!ll_front_merge_fn(q, req, bio))
1203 return false;
1204
1205 trace_block_bio_frontmerge(q, bio);
1206
1207 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1208 blk_rq_set_mixed_merge(req);
1209
1210 bio->bi_next = req->bio;
1211 req->bio = bio;
1212
1213
1214
1215
1216
1217
1218 req->buffer = bio_data(bio);
1219 req->__sector = bio->bi_sector;
1220 req->__data_len += bio->bi_size;
1221 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1222
1223 drive_stat_acct(req, 0);
1224 elv_bio_merged(q, req, bio);
1225 return true;
1226}
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246static bool attempt_plug_merge(struct request_queue *q, struct bio *bio,
1247 unsigned int *request_count)
1248{
1249 struct blk_plug *plug;
1250 struct request *rq;
1251 bool ret = false;
1252
1253 plug = current->plug;
1254 if (!plug)
1255 goto out;
1256 *request_count = 0;
1257
1258 list_for_each_entry_reverse(rq, &plug->list, queuelist) {
1259 int el_ret;
1260
1261 (*request_count)++;
1262
1263 if (rq->q != q)
1264 continue;
1265
1266 el_ret = elv_try_merge(rq, bio);
1267 if (el_ret == ELEVATOR_BACK_MERGE) {
1268 ret = bio_attempt_back_merge(q, rq, bio);
1269 if (ret)
1270 break;
1271 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1272 ret = bio_attempt_front_merge(q, rq, bio);
1273 if (ret)
1274 break;
1275 }
1276 }
1277out:
1278 return ret;
1279}
1280
1281void init_request_from_bio(struct request *req, struct bio *bio)
1282{
1283 req->cmd_type = REQ_TYPE_FS;
1284
1285 req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
1286 if (bio->bi_rw & REQ_RAHEAD)
1287 req->cmd_flags |= REQ_FAILFAST_MASK;
1288
1289 req->errors = 0;
1290 req->__sector = bio->bi_sector;
1291 req->ioprio = bio_prio(bio);
1292 blk_rq_bio_prep(req->q, req, bio);
1293}
1294
1295void blk_queue_bio(struct request_queue *q, struct bio *bio)
1296{
1297 const bool sync = !!(bio->bi_rw & REQ_SYNC);
1298 struct blk_plug *plug;
1299 int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
1300 struct request *req;
1301 unsigned int request_count = 0;
1302
1303
1304
1305
1306
1307
1308 blk_queue_bounce(q, &bio);
1309
1310 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1311 spin_lock_irq(q->queue_lock);
1312 where = ELEVATOR_INSERT_FLUSH;
1313 goto get_rq;
1314 }
1315
1316
1317
1318
1319
1320 if (attempt_plug_merge(q, bio, &request_count))
1321 return;
1322
1323 spin_lock_irq(q->queue_lock);
1324
1325 el_ret = elv_merge(q, &req, bio);
1326 if (el_ret == ELEVATOR_BACK_MERGE) {
1327 if (bio_attempt_back_merge(q, req, bio)) {
1328 if (!attempt_back_merge(q, req))
1329 elv_merged_request(q, req, el_ret);
1330 goto out_unlock;
1331 }
1332 } else if (el_ret == ELEVATOR_FRONT_MERGE) {
1333 if (bio_attempt_front_merge(q, req, bio)) {
1334 if (!attempt_front_merge(q, req))
1335 elv_merged_request(q, req, el_ret);
1336 goto out_unlock;
1337 }
1338 }
1339
1340get_rq:
1341
1342
1343
1344
1345
1346 rw_flags = bio_data_dir(bio);
1347 if (sync)
1348 rw_flags |= REQ_SYNC;
1349
1350
1351
1352
1353
1354 req = get_request_wait(q, rw_flags, bio);
1355 if (unlikely(!req)) {
1356 bio_endio(bio, -ENODEV);
1357 goto out_unlock;
1358 }
1359
1360
1361
1362
1363
1364
1365
1366 init_request_from_bio(req, bio);
1367
1368 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
1369 req->cpu = raw_smp_processor_id();
1370
1371 plug = current->plug;
1372 if (plug) {
1373
1374
1375
1376
1377
1378
1379 if (list_empty(&plug->list))
1380 trace_block_plug(q);
1381 else {
1382 if (!plug->should_sort) {
1383 struct request *__rq;
1384
1385 __rq = list_entry_rq(plug->list.prev);
1386 if (__rq->q != q)
1387 plug->should_sort = 1;
1388 }
1389 if (request_count >= BLK_MAX_REQUEST_COUNT) {
1390 blk_flush_plug_list(plug, false);
1391 trace_block_plug(q);
1392 }
1393 }
1394 list_add_tail(&req->queuelist, &plug->list);
1395 drive_stat_acct(req, 1);
1396 } else {
1397 spin_lock_irq(q->queue_lock);
1398 add_acct_request(q, req, where);
1399 __blk_run_queue(q);
1400out_unlock:
1401 spin_unlock_irq(q->queue_lock);
1402 }
1403}
1404EXPORT_SYMBOL_GPL(blk_queue_bio);
1405
1406
1407
1408
1409static inline void blk_partition_remap(struct bio *bio)
1410{
1411 struct block_device *bdev = bio->bi_bdev;
1412
1413 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1414 struct hd_struct *p = bdev->bd_part;
1415
1416 bio->bi_sector += p->start_sect;
1417 bio->bi_bdev = bdev->bd_contains;
1418
1419 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1420 bdev->bd_dev,
1421 bio->bi_sector - p->start_sect);
1422 }
1423}
1424
1425static void handle_bad_sector(struct bio *bio)
1426{
1427 char b[BDEVNAME_SIZE];
1428
1429 printk(KERN_INFO "attempt to access beyond end of device\n");
1430 printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
1431 bdevname(bio->bi_bdev, b),
1432 bio->bi_rw,
1433 (unsigned long long)bio->bi_sector + bio_sectors(bio),
1434 (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
1435
1436 set_bit(BIO_EOF, &bio->bi_flags);
1437}
1438
1439#ifdef CONFIG_FAIL_MAKE_REQUEST
1440
1441static DECLARE_FAULT_ATTR(fail_make_request);
1442
1443static int __init setup_fail_make_request(char *str)
1444{
1445 return setup_fault_attr(&fail_make_request, str);
1446}
1447__setup("fail_make_request=", setup_fail_make_request);
1448
1449static bool should_fail_request(struct hd_struct *part, unsigned int bytes)
1450{
1451 return part->make_it_fail && should_fail(&fail_make_request, bytes);
1452}
1453
1454static int __init fail_make_request_debugfs(void)
1455{
1456 struct dentry *dir = fault_create_debugfs_attr("fail_make_request",
1457 NULL, &fail_make_request);
1458
1459 return IS_ERR(dir) ? PTR_ERR(dir) : 0;
1460}
1461
1462late_initcall(fail_make_request_debugfs);
1463
1464#else
1465
1466static inline bool should_fail_request(struct hd_struct *part,
1467 unsigned int bytes)
1468{
1469 return false;
1470}
1471
1472#endif
1473
1474
1475
1476
1477static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1478{
1479 sector_t maxsector;
1480
1481 if (!nr_sectors)
1482 return 0;
1483
1484
1485 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1486 if (maxsector) {
1487 sector_t sector = bio->bi_sector;
1488
1489 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1490
1491
1492
1493
1494
1495 handle_bad_sector(bio);
1496 return 1;
1497 }
1498 }
1499
1500 return 0;
1501}
1502
1503static noinline_for_stack bool
1504generic_make_request_checks(struct bio *bio)
1505{
1506 struct request_queue *q;
1507 int nr_sectors = bio_sectors(bio);
1508 int err = -EIO;
1509 char b[BDEVNAME_SIZE];
1510 struct hd_struct *part;
1511
1512 might_sleep();
1513
1514 if (bio_check_eod(bio, nr_sectors))
1515 goto end_io;
1516
1517 q = bdev_get_queue(bio->bi_bdev);
1518 if (unlikely(!q)) {
1519 printk(KERN_ERR
1520 "generic_make_request: Trying to access "
1521 "nonexistent block-device %s (%Lu)\n",
1522 bdevname(bio->bi_bdev, b),
1523 (long long) bio->bi_sector);
1524 goto end_io;
1525 }
1526
1527 if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
1528 nr_sectors > queue_max_hw_sectors(q))) {
1529 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1530 bdevname(bio->bi_bdev, b),
1531 bio_sectors(bio),
1532 queue_max_hw_sectors(q));
1533 goto end_io;
1534 }
1535
1536 part = bio->bi_bdev->bd_part;
1537 if (should_fail_request(part, bio->bi_size) ||
1538 should_fail_request(&part_to_disk(part)->part0,
1539 bio->bi_size))
1540 goto end_io;
1541
1542
1543
1544
1545
1546 blk_partition_remap(bio);
1547
1548 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1549 goto end_io;
1550
1551 if (bio_check_eod(bio, nr_sectors))
1552 goto end_io;
1553
1554
1555
1556
1557
1558
1559 if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
1560 bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
1561 if (!nr_sectors) {
1562 err = 0;
1563 goto end_io;
1564 }
1565 }
1566
1567 if ((bio->bi_rw & REQ_DISCARD) &&
1568 (!blk_queue_discard(q) ||
1569 ((bio->bi_rw & REQ_SECURE) &&
1570 !blk_queue_secdiscard(q)))) {
1571 err = -EOPNOTSUPP;
1572 goto end_io;
1573 }
1574
1575 if (blk_throtl_bio(q, bio))
1576 return false;
1577
1578 trace_block_bio_queue(q, bio);
1579 return true;
1580
1581end_io:
1582 bio_endio(bio, err);
1583 return false;
1584}
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610void generic_make_request(struct bio *bio)
1611{
1612 struct bio_list bio_list_on_stack;
1613
1614 if (!generic_make_request_checks(bio))
1615 return;
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627 if (current->bio_list) {
1628 bio_list_add(current->bio_list, bio);
1629 return;
1630 }
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646 BUG_ON(bio->bi_next);
1647 bio_list_init(&bio_list_on_stack);
1648 current->bio_list = &bio_list_on_stack;
1649 do {
1650 struct request_queue *q = bdev_get_queue(bio->bi_bdev);
1651
1652 q->make_request_fn(q, bio);
1653
1654 bio = bio_list_pop(current->bio_list);
1655 } while (bio);
1656 current->bio_list = NULL;
1657}
1658EXPORT_SYMBOL(generic_make_request);
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670void submit_bio(int rw, struct bio *bio)
1671{
1672 int count = bio_sectors(bio);
1673
1674 bio->bi_rw |= rw;
1675
1676
1677
1678
1679
1680 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
1681 if (rw & WRITE) {
1682 count_vm_events(PGPGOUT, count);
1683 } else {
1684 task_io_account_read(bio->bi_size);
1685 count_vm_events(PGPGIN, count);
1686 }
1687
1688 if (unlikely(block_dump)) {
1689 char b[BDEVNAME_SIZE];
1690 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1691 current->comm, task_pid_nr(current),
1692 (rw & WRITE) ? "WRITE" : "READ",
1693 (unsigned long long)bio->bi_sector,
1694 bdevname(bio->bi_bdev, b),
1695 count);
1696 }
1697 }
1698
1699 generic_make_request(bio);
1700}
1701EXPORT_SYMBOL(submit_bio);
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1725{
1726 if (rq->cmd_flags & REQ_DISCARD)
1727 return 0;
1728
1729 if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
1730 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1731 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1732 return -EIO;
1733 }
1734
1735
1736
1737
1738
1739
1740
1741 blk_recalc_rq_segments(rq);
1742 if (rq->nr_phys_segments > queue_max_segments(q)) {
1743 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1744 return -EIO;
1745 }
1746
1747 return 0;
1748}
1749EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1750
1751
1752
1753
1754
1755
1756int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1757{
1758 unsigned long flags;
1759 int where = ELEVATOR_INSERT_BACK;
1760
1761 if (blk_rq_check_limits(q, rq))
1762 return -EIO;
1763
1764 if (rq->rq_disk &&
1765 should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
1766 return -EIO;
1767
1768 spin_lock_irqsave(q->queue_lock, flags);
1769
1770
1771
1772
1773
1774 BUG_ON(blk_queued_rq(rq));
1775
1776 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1777 where = ELEVATOR_INSERT_FLUSH;
1778
1779 add_acct_request(q, rq, where);
1780 if (where == ELEVATOR_INSERT_FLUSH)
1781 __blk_run_queue(q);
1782 spin_unlock_irqrestore(q->queue_lock, flags);
1783
1784 return 0;
1785}
1786EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804unsigned int blk_rq_err_bytes(const struct request *rq)
1805{
1806 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1807 unsigned int bytes = 0;
1808 struct bio *bio;
1809
1810 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1811 return blk_rq_bytes(rq);
1812
1813
1814
1815
1816
1817
1818
1819
1820 for (bio = rq->bio; bio; bio = bio->bi_next) {
1821 if ((bio->bi_rw & ff) != ff)
1822 break;
1823 bytes += bio->bi_size;
1824 }
1825
1826
1827 BUG_ON(blk_rq_bytes(rq) && !bytes);
1828 return bytes;
1829}
1830EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1831
1832static void blk_account_io_completion(struct request *req, unsigned int bytes)
1833{
1834 if (blk_do_io_stat(req)) {
1835 const int rw = rq_data_dir(req);
1836 struct hd_struct *part;
1837 int cpu;
1838
1839 cpu = part_stat_lock();
1840 part = req->part;
1841 part_stat_add(cpu, part, sectors[rw], bytes >> 9);
1842 part_stat_unlock();
1843 }
1844}
1845
1846static void blk_account_io_done(struct request *req)
1847{
1848
1849
1850
1851
1852
1853 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
1854 unsigned long duration = jiffies - req->start_time;
1855 const int rw = rq_data_dir(req);
1856 struct hd_struct *part;
1857 int cpu;
1858
1859 cpu = part_stat_lock();
1860 part = req->part;
1861
1862 part_stat_inc(cpu, part, ios[rw]);
1863 part_stat_add(cpu, part, ticks[rw], duration);
1864 part_round_stats(cpu, part);
1865 part_dec_in_flight(part, rw);
1866
1867 hd_struct_put(part);
1868 part_stat_unlock();
1869 }
1870}
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888struct request *blk_peek_request(struct request_queue *q)
1889{
1890 struct request *rq;
1891 int ret;
1892
1893 while ((rq = __elv_next_request(q)) != NULL) {
1894 if (!(rq->cmd_flags & REQ_STARTED)) {
1895
1896
1897
1898
1899
1900 if (rq->cmd_flags & REQ_SORTED)
1901 elv_activate_rq(q, rq);
1902
1903
1904
1905
1906
1907
1908 rq->cmd_flags |= REQ_STARTED;
1909 trace_block_rq_issue(q, rq);
1910 }
1911
1912 if (!q->boundary_rq || q->boundary_rq == rq) {
1913 q->end_sector = rq_end_sector(rq);
1914 q->boundary_rq = NULL;
1915 }
1916
1917 if (rq->cmd_flags & REQ_DONTPREP)
1918 break;
1919
1920 if (q->dma_drain_size && blk_rq_bytes(rq)) {
1921
1922
1923
1924
1925
1926
1927 rq->nr_phys_segments++;
1928 }
1929
1930 if (!q->prep_rq_fn)
1931 break;
1932
1933 ret = q->prep_rq_fn(q, rq);
1934 if (ret == BLKPREP_OK) {
1935 break;
1936 } else if (ret == BLKPREP_DEFER) {
1937
1938
1939
1940
1941
1942
1943 if (q->dma_drain_size && blk_rq_bytes(rq) &&
1944 !(rq->cmd_flags & REQ_DONTPREP)) {
1945
1946
1947
1948
1949 --rq->nr_phys_segments;
1950 }
1951
1952 rq = NULL;
1953 break;
1954 } else if (ret == BLKPREP_KILL) {
1955 rq->cmd_flags |= REQ_QUIET;
1956
1957
1958
1959
1960 blk_start_request(rq);
1961 __blk_end_request_all(rq, -EIO);
1962 } else {
1963 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
1964 break;
1965 }
1966 }
1967
1968 return rq;
1969}
1970EXPORT_SYMBOL(blk_peek_request);
1971
1972void blk_dequeue_request(struct request *rq)
1973{
1974 struct request_queue *q = rq->q;
1975
1976 BUG_ON(list_empty(&rq->queuelist));
1977 BUG_ON(ELV_ON_HASH(rq));
1978
1979 list_del_init(&rq->queuelist);
1980
1981
1982
1983
1984
1985
1986 if (blk_account_rq(rq)) {
1987 q->in_flight[rq_is_sync(rq)]++;
1988 set_io_start_time_ns(rq);
1989 }
1990}
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006void blk_start_request(struct request *req)
2007{
2008 blk_dequeue_request(req);
2009
2010
2011
2012
2013
2014 req->resid_len = blk_rq_bytes(req);
2015 if (unlikely(blk_bidi_rq(req)))
2016 req->next_rq->resid_len = blk_rq_bytes(req->next_rq);
2017
2018 blk_add_timer(req);
2019}
2020EXPORT_SYMBOL(blk_start_request);
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037struct request *blk_fetch_request(struct request_queue *q)
2038{
2039 struct request *rq;
2040
2041 rq = blk_peek_request(q);
2042 if (rq)
2043 blk_start_request(rq);
2044 return rq;
2045}
2046EXPORT_SYMBOL(blk_fetch_request);
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2071{
2072 int total_bytes, bio_nbytes, next_idx = 0;
2073 struct bio *bio;
2074
2075 if (!req->bio)
2076 return false;
2077
2078 trace_block_rq_complete(req->q, req);
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088 if (req->cmd_type == REQ_TYPE_FS)
2089 req->errors = 0;
2090
2091 if (error && req->cmd_type == REQ_TYPE_FS &&
2092 !(req->cmd_flags & REQ_QUIET)) {
2093 char *error_type;
2094
2095 switch (error) {
2096 case -ENOLINK:
2097 error_type = "recoverable transport";
2098 break;
2099 case -EREMOTEIO:
2100 error_type = "critical target";
2101 break;
2102 case -EBADE:
2103 error_type = "critical nexus";
2104 break;
2105 case -EIO:
2106 default:
2107 error_type = "I/O";
2108 break;
2109 }
2110 printk(KERN_ERR "end_request: %s error, dev %s, sector %llu\n",
2111 error_type, req->rq_disk ? req->rq_disk->disk_name : "?",
2112 (unsigned long long)blk_rq_pos(req));
2113 }
2114
2115 blk_account_io_completion(req, nr_bytes);
2116
2117 total_bytes = bio_nbytes = 0;
2118 while ((bio = req->bio) != NULL) {
2119 int nbytes;
2120
2121 if (nr_bytes >= bio->bi_size) {
2122 req->bio = bio->bi_next;
2123 nbytes = bio->bi_size;
2124 req_bio_endio(req, bio, nbytes, error);
2125 next_idx = 0;
2126 bio_nbytes = 0;
2127 } else {
2128 int idx = bio->bi_idx + next_idx;
2129
2130 if (unlikely(idx >= bio->bi_vcnt)) {
2131 blk_dump_rq_flags(req, "__end_that");
2132 printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n",
2133 __func__, idx, bio->bi_vcnt);
2134 break;
2135 }
2136
2137 nbytes = bio_iovec_idx(bio, idx)->bv_len;
2138 BIO_BUG_ON(nbytes > bio->bi_size);
2139
2140
2141
2142
2143 if (unlikely(nbytes > nr_bytes)) {
2144 bio_nbytes += nr_bytes;
2145 total_bytes += nr_bytes;
2146 break;
2147 }
2148
2149
2150
2151
2152 next_idx++;
2153 bio_nbytes += nbytes;
2154 }
2155
2156 total_bytes += nbytes;
2157 nr_bytes -= nbytes;
2158
2159 bio = req->bio;
2160 if (bio) {
2161
2162
2163
2164 if (unlikely(nr_bytes <= 0))
2165 break;
2166 }
2167 }
2168
2169
2170
2171
2172 if (!req->bio) {
2173
2174
2175
2176
2177
2178 req->__data_len = 0;
2179 return false;
2180 }
2181
2182
2183
2184
2185 if (bio_nbytes) {
2186 req_bio_endio(req, bio, bio_nbytes, error);
2187 bio->bi_idx += next_idx;
2188 bio_iovec(bio)->bv_offset += nr_bytes;
2189 bio_iovec(bio)->bv_len -= nr_bytes;
2190 }
2191
2192 req->__data_len -= total_bytes;
2193 req->buffer = bio_data(req->bio);
2194
2195
2196 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
2197 req->__sector += total_bytes >> 9;
2198
2199
2200 if (req->cmd_flags & REQ_MIXED_MERGE) {
2201 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2202 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2203 }
2204
2205
2206
2207
2208
2209 if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) {
2210 blk_dump_rq_flags(req, "request botched");
2211 req->__data_len = blk_rq_cur_bytes(req);
2212 }
2213
2214
2215 blk_recalc_rq_segments(req);
2216
2217 return true;
2218}
2219EXPORT_SYMBOL_GPL(blk_update_request);
2220
2221static bool blk_update_bidi_request(struct request *rq, int error,
2222 unsigned int nr_bytes,
2223 unsigned int bidi_bytes)
2224{
2225 if (blk_update_request(rq, error, nr_bytes))
2226 return true;
2227
2228
2229 if (unlikely(blk_bidi_rq(rq)) &&
2230 blk_update_request(rq->next_rq, error, bidi_bytes))
2231 return true;
2232
2233 if (blk_queue_add_random(rq->q))
2234 add_disk_randomness(rq->rq_disk);
2235
2236 return false;
2237}
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249void blk_unprep_request(struct request *req)
2250{
2251 struct request_queue *q = req->q;
2252
2253 req->cmd_flags &= ~REQ_DONTPREP;
2254 if (q->unprep_rq_fn)
2255 q->unprep_rq_fn(q, req);
2256}
2257EXPORT_SYMBOL_GPL(blk_unprep_request);
2258
2259
2260
2261
2262static void blk_finish_request(struct request *req, int error)
2263{
2264 if (blk_rq_tagged(req))
2265 blk_queue_end_tag(req->q, req);
2266
2267 BUG_ON(blk_queued_rq(req));
2268
2269 if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
2270 laptop_io_completion(&req->q->backing_dev_info);
2271
2272 blk_delete_timer(req);
2273
2274 if (req->cmd_flags & REQ_DONTPREP)
2275 blk_unprep_request(req);
2276
2277
2278 blk_account_io_done(req);
2279
2280 if (req->end_io)
2281 req->end_io(req, error);
2282 else {
2283 if (blk_bidi_rq(req))
2284 __blk_put_request(req->next_rq->q, req->next_rq);
2285
2286 __blk_put_request(req->q, req);
2287 }
2288}
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307static bool blk_end_bidi_request(struct request *rq, int error,
2308 unsigned int nr_bytes, unsigned int bidi_bytes)
2309{
2310 struct request_queue *q = rq->q;
2311 unsigned long flags;
2312
2313 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2314 return true;
2315
2316 spin_lock_irqsave(q->queue_lock, flags);
2317 blk_finish_request(rq, error);
2318 spin_unlock_irqrestore(q->queue_lock, flags);
2319
2320 return false;
2321}
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338bool __blk_end_bidi_request(struct request *rq, int error,
2339 unsigned int nr_bytes, unsigned int bidi_bytes)
2340{
2341 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
2342 return true;
2343
2344 blk_finish_request(rq, error);
2345
2346 return false;
2347}
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2364{
2365 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2366}
2367EXPORT_SYMBOL(blk_end_request);
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377void blk_end_request_all(struct request *rq, int error)
2378{
2379 bool pending;
2380 unsigned int bidi_bytes = 0;
2381
2382 if (unlikely(blk_bidi_rq(rq)))
2383 bidi_bytes = blk_rq_bytes(rq->next_rq);
2384
2385 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2386 BUG_ON(pending);
2387}
2388EXPORT_SYMBOL(blk_end_request_all);
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402bool blk_end_request_cur(struct request *rq, int error)
2403{
2404 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2405}
2406EXPORT_SYMBOL(blk_end_request_cur);
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420bool blk_end_request_err(struct request *rq, int error)
2421{
2422 WARN_ON(error >= 0);
2423 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2424}
2425EXPORT_SYMBOL_GPL(blk_end_request_err);
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2441{
2442 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2443}
2444EXPORT_SYMBOL(__blk_end_request);
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454void __blk_end_request_all(struct request *rq, int error)
2455{
2456 bool pending;
2457 unsigned int bidi_bytes = 0;
2458
2459 if (unlikely(blk_bidi_rq(rq)))
2460 bidi_bytes = blk_rq_bytes(rq->next_rq);
2461
2462 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2463 BUG_ON(pending);
2464}
2465EXPORT_SYMBOL(__blk_end_request_all);
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480bool __blk_end_request_cur(struct request *rq, int error)
2481{
2482 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2483}
2484EXPORT_SYMBOL(__blk_end_request_cur);
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499bool __blk_end_request_err(struct request *rq, int error)
2500{
2501 WARN_ON(error >= 0);
2502 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2503}
2504EXPORT_SYMBOL_GPL(__blk_end_request_err);
2505
2506void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2507 struct bio *bio)
2508{
2509
2510 rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
2511
2512 if (bio_has_data(bio)) {
2513 rq->nr_phys_segments = bio_phys_segments(q, bio);
2514 rq->buffer = bio_data(bio);
2515 }
2516 rq->__data_len = bio->bi_size;
2517 rq->bio = rq->biotail = bio;
2518
2519 if (bio->bi_bdev)
2520 rq->rq_disk = bio->bi_bdev->bd_disk;
2521}
2522
2523#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
2524
2525
2526
2527
2528
2529
2530
2531void rq_flush_dcache_pages(struct request *rq)
2532{
2533 struct req_iterator iter;
2534 struct bio_vec *bvec;
2535
2536 rq_for_each_segment(bvec, rq, iter)
2537 flush_dcache_page(bvec->bv_page);
2538}
2539EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2540#endif
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561int blk_lld_busy(struct request_queue *q)
2562{
2563 if (q->lld_busy_fn)
2564 return q->lld_busy_fn(q);
2565
2566 return 0;
2567}
2568EXPORT_SYMBOL_GPL(blk_lld_busy);
2569
2570
2571
2572
2573
2574
2575
2576
2577void blk_rq_unprep_clone(struct request *rq)
2578{
2579 struct bio *bio;
2580
2581 while ((bio = rq->bio) != NULL) {
2582 rq->bio = bio->bi_next;
2583
2584 bio_put(bio);
2585 }
2586}
2587EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
2588
2589
2590
2591
2592
2593static void __blk_rq_prep_clone(struct request *dst, struct request *src)
2594{
2595 dst->cpu = src->cpu;
2596 dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
2597 dst->cmd_type = src->cmd_type;
2598 dst->__sector = blk_rq_pos(src);
2599 dst->__data_len = blk_rq_bytes(src);
2600 dst->nr_phys_segments = src->nr_phys_segments;
2601 dst->ioprio = src->ioprio;
2602 dst->extra_len = src->extra_len;
2603}
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2625 struct bio_set *bs, gfp_t gfp_mask,
2626 int (*bio_ctr)(struct bio *, struct bio *, void *),
2627 void *data)
2628{
2629 struct bio *bio, *bio_src;
2630
2631 if (!bs)
2632 bs = fs_bio_set;
2633
2634 blk_rq_init(NULL, rq);
2635
2636 __rq_for_each_bio(bio_src, rq_src) {
2637 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
2638 if (!bio)
2639 goto free_and_out;
2640
2641 __bio_clone(bio, bio_src);
2642
2643 if (bio_integrity(bio_src) &&
2644 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2645 goto free_and_out;
2646
2647 if (bio_ctr && bio_ctr(bio, bio_src, data))
2648 goto free_and_out;
2649
2650 if (rq->bio) {
2651 rq->biotail->bi_next = bio;
2652 rq->biotail = bio;
2653 } else
2654 rq->bio = rq->biotail = bio;
2655 }
2656
2657 __blk_rq_prep_clone(rq, rq_src);
2658
2659 return 0;
2660
2661free_and_out:
2662 if (bio)
2663 bio_free(bio, bs);
2664 blk_rq_unprep_clone(rq);
2665
2666 return -ENOMEM;
2667}
2668EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
2669
2670int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2671{
2672 return queue_work(kblockd_workqueue, work);
2673}
2674EXPORT_SYMBOL(kblockd_schedule_work);
2675
2676int kblockd_schedule_delayed_work(struct request_queue *q,
2677 struct delayed_work *dwork, unsigned long delay)
2678{
2679 return queue_delayed_work(kblockd_workqueue, dwork, delay);
2680}
2681EXPORT_SYMBOL(kblockd_schedule_delayed_work);
2682
2683#define PLUG_MAGIC 0x91827364
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699void blk_start_plug(struct blk_plug *plug)
2700{
2701 struct task_struct *tsk = current;
2702
2703 plug->magic = PLUG_MAGIC;
2704 INIT_LIST_HEAD(&plug->list);
2705 INIT_LIST_HEAD(&plug->cb_list);
2706 plug->should_sort = 0;
2707
2708
2709
2710
2711
2712 if (!tsk->plug) {
2713
2714
2715
2716
2717 tsk->plug = plug;
2718 }
2719}
2720EXPORT_SYMBOL(blk_start_plug);
2721
2722static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
2723{
2724 struct request *rqa = container_of(a, struct request, queuelist);
2725 struct request *rqb = container_of(b, struct request, queuelist);
2726
2727 return !(rqa->q <= rqb->q);
2728}
2729
2730
2731
2732
2733
2734
2735
2736static void queue_unplugged(struct request_queue *q, unsigned int depth,
2737 bool from_schedule)
2738 __releases(q->queue_lock)
2739{
2740 trace_block_unplug(q, depth, !from_schedule);
2741
2742
2743
2744
2745
2746
2747 if (from_schedule) {
2748 spin_unlock(q->queue_lock);
2749 blk_run_queue_async(q);
2750 } else {
2751 __blk_run_queue(q);
2752 spin_unlock(q->queue_lock);
2753 }
2754
2755}
2756
2757static void flush_plug_callbacks(struct blk_plug *plug)
2758{
2759 LIST_HEAD(callbacks);
2760
2761 if (list_empty(&plug->cb_list))
2762 return;
2763
2764 list_splice_init(&plug->cb_list, &callbacks);
2765
2766 while (!list_empty(&callbacks)) {
2767 struct blk_plug_cb *cb = list_first_entry(&callbacks,
2768 struct blk_plug_cb,
2769 list);
2770 list_del(&cb->list);
2771 cb->callback(cb);
2772 }
2773}
2774
2775void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
2776{
2777 struct request_queue *q;
2778 unsigned long flags;
2779 struct request *rq;
2780 LIST_HEAD(list);
2781 unsigned int depth;
2782
2783 BUG_ON(plug->magic != PLUG_MAGIC);
2784
2785 flush_plug_callbacks(plug);
2786 if (list_empty(&plug->list))
2787 return;
2788
2789 list_splice_init(&plug->list, &list);
2790
2791 if (plug->should_sort) {
2792 list_sort(NULL, &list, plug_rq_cmp);
2793 plug->should_sort = 0;
2794 }
2795
2796 q = NULL;
2797 depth = 0;
2798
2799
2800
2801
2802
2803 local_irq_save(flags);
2804 while (!list_empty(&list)) {
2805 rq = list_entry_rq(list.next);
2806 list_del_init(&rq->queuelist);
2807 BUG_ON(!rq->q);
2808 if (rq->q != q) {
2809
2810
2811
2812 if (q)
2813 queue_unplugged(q, depth, from_schedule);
2814 q = rq->q;
2815 depth = 0;
2816 spin_lock(q->queue_lock);
2817 }
2818
2819
2820
2821 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
2822 __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
2823 else
2824 __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
2825
2826 depth++;
2827 }
2828
2829
2830
2831
2832 if (q)
2833 queue_unplugged(q, depth, from_schedule);
2834
2835 local_irq_restore(flags);
2836}
2837
2838void blk_finish_plug(struct blk_plug *plug)
2839{
2840 blk_flush_plug_list(plug, false);
2841
2842 if (plug == current->plug)
2843 current->plug = NULL;
2844}
2845EXPORT_SYMBOL(blk_finish_plug);
2846
2847int __init blk_dev_init(void)
2848{
2849 BUILD_BUG_ON(__REQ_NR_BITS > 8 *
2850 sizeof(((struct request *)0)->cmd_flags));
2851
2852
2853 kblockd_workqueue = alloc_workqueue("kblockd",
2854 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
2855 if (!kblockd_workqueue)
2856 panic("Failed to create kblockd\n");
2857
2858 request_cachep = kmem_cache_create("blkdev_requests",
2859 sizeof(struct request), 0, SLAB_PANIC, NULL);
2860
2861 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2862 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2863
2864 return 0;
2865}
2866