1
2
3
4
5
6
7
8#include <linux/config.h>
9#include <linux/init.h>
10#include <linux/mm.h>
11#include <linux/locks.h>
12#include <linux/fcntl.h>
13#include <linux/slab.h>
14#include <linux/kmod.h>
15#include <linux/major.h>
16#include <linux/devfs_fs_kernel.h>
17#include <linux/smp_lock.h>
18#include <linux/iobuf.h>
19#include <linux/highmem.h>
20#include <linux/blkdev.h>
21#include <linux/module.h>
22
23#include <asm/uaccess.h>
24
25static unsigned long max_block(kdev_t dev)
26{
27 unsigned int retval = ~0U;
28 int major = MAJOR(dev);
29
30 if (blk_size[major]) {
31 int minor = MINOR(dev);
32 unsigned int blocks = blk_size[major][minor];
33 if (blocks) {
34 unsigned int size = block_size(dev);
35 unsigned int sizebits = blksize_bits(size);
36 blocks += (size-1) >> BLOCK_SIZE_BITS;
37 retval = blocks << (BLOCK_SIZE_BITS - sizebits);
38 if (sizebits > BLOCK_SIZE_BITS)
39 retval = blocks >> (sizebits - BLOCK_SIZE_BITS);
40 }
41 }
42 return retval;
43}
44
45static loff_t blkdev_size(kdev_t dev)
46{
47 unsigned int blocks = ~0U;
48 int major = MAJOR(dev);
49
50 if (blk_size[major]) {
51 int minor = MINOR(dev);
52 blocks = blk_size[major][minor];
53 }
54 return (loff_t) blocks << BLOCK_SIZE_BITS;
55}
56
57
58static void kill_bdev(struct block_device *bdev)
59{
60 invalidate_bdev(bdev, 1);
61 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
62}
63
64int set_blocksize(kdev_t dev, int size)
65{
66 int oldsize;
67 struct block_device *bdev;
68
69
70 if (size > PAGE_SIZE || size < 512 || (size & (size-1)))
71 return -EINVAL;
72
73
74 if (size < get_hardsect_size(dev))
75 return -EINVAL;
76
77
78 if (!blksize_size[MAJOR(dev)]) {
79 if (size == BLOCK_SIZE)
80 return 0;
81 return -EINVAL;
82 }
83
84 oldsize = blksize_size[MAJOR(dev)][MINOR(dev)];
85 if (oldsize == size)
86 return 0;
87
88 if (!oldsize && size == BLOCK_SIZE) {
89 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
90 return 0;
91 }
92
93
94 bdev = bdget(dev);
95 sync_buffers(dev, 2);
96 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
97 bdev->bd_inode->i_blkbits = blksize_bits(size);
98 kill_bdev(bdev);
99 bdput(bdev);
100 return 0;
101}
102
103int sb_set_blocksize(struct super_block *sb, int size)
104{
105 int bits;
106 if (set_blocksize(sb->s_dev, size) < 0)
107 return 0;
108 sb->s_blocksize = size;
109 for (bits = 9, size >>= 9; size >>= 1; bits++)
110 ;
111 sb->s_blocksize_bits = bits;
112 return sb->s_blocksize;
113}
114
115int sb_min_blocksize(struct super_block *sb, int size)
116{
117 int minsize = get_hardsect_size(sb->s_dev);
118 if (size < minsize)
119 size = minsize;
120 return sb_set_blocksize(sb, size);
121}
122
123static int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh, int create)
124{
125 if (iblock >= max_block(inode->i_rdev))
126 return -EIO;
127
128 bh->b_dev = inode->i_rdev;
129 bh->b_blocknr = iblock;
130 bh->b_state |= 1UL << BH_Mapped;
131 return 0;
132}
133
134static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
135{
136 return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, blkdev_get_block);
137}
138
139static int blkdev_writepage(struct page * page)
140{
141 return block_write_full_page(page, blkdev_get_block);
142}
143
144static int blkdev_readpage(struct file * file, struct page * page)
145{
146 return block_read_full_page(page, blkdev_get_block);
147}
148
149static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
150{
151 return block_prepare_write(page, from, to, blkdev_get_block);
152}
153
154static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
155{
156 return block_commit_write(page, from, to);
157}
158
159
160
161
162
163
164static loff_t block_llseek(struct file *file, loff_t offset, int origin)
165{
166
167 loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size;
168 loff_t retval;
169
170 switch (origin) {
171 case 2:
172 offset += size;
173 break;
174 case 1:
175 offset += file->f_pos;
176 }
177 retval = -EINVAL;
178 if (offset >= 0 && offset <= size) {
179 if (offset != file->f_pos) {
180 file->f_pos = offset;
181 file->f_reada = 0;
182 file->f_version = ++event;
183 }
184 retval = offset;
185 }
186 return retval;
187}
188
189
190static int __block_fsync(struct inode * inode)
191{
192 int ret, err;
193
194 ret = filemap_fdatasync(inode->i_mapping);
195 err = sync_buffers(inode->i_rdev, 1);
196 if (err && !ret)
197 ret = err;
198 err = filemap_fdatawait(inode->i_mapping);
199 if (err && !ret)
200 ret = err;
201
202 return ret;
203}
204
205
206
207
208
209
210static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
211{
212 struct inode * inode = dentry->d_inode;
213
214 return __block_fsync(inode);
215}
216
217
218
219
220
221static struct super_block *bd_read_super(struct super_block *sb, void *data, int silent)
222{
223 static struct super_operations sops = {};
224 struct inode *root = new_inode(sb);
225 if (!root)
226 return NULL;
227 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
228 root->i_uid = root->i_gid = 0;
229 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
230 sb->s_maxbytes = ~0ULL;
231 sb->s_blocksize = 1024;
232 sb->s_blocksize_bits = 10;
233 sb->s_magic = 0x62646576;
234 sb->s_op = &sops;
235 sb->s_root = d_alloc(NULL, &(const struct qstr) { "bdev:", 5, 0 });
236 if (!sb->s_root) {
237 iput(root);
238 return NULL;
239 }
240 sb->s_root->d_sb = sb;
241 sb->s_root->d_parent = sb->s_root;
242 d_instantiate(sb->s_root, root);
243 return sb;
244}
245
246static DECLARE_FSTYPE(bd_type, "bdev", bd_read_super, FS_NOMOUNT);
247
248static struct vfsmount *bd_mnt;
249
250
251
252
253
254
255#define HASH_BITS 6
256#define HASH_SIZE (1UL << HASH_BITS)
257#define HASH_MASK (HASH_SIZE-1)
258static struct list_head bdev_hashtable[HASH_SIZE];
259static spinlock_t bdev_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
260static kmem_cache_t * bdev_cachep;
261
262#define alloc_bdev() \
263 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
264#define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
265
266static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
267{
268 struct block_device * bdev = (struct block_device *) foo;
269
270 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
271 SLAB_CTOR_CONSTRUCTOR)
272 {
273 memset(bdev, 0, sizeof(*bdev));
274 sema_init(&bdev->bd_sem, 1);
275 INIT_LIST_HEAD(&bdev->bd_inodes);
276 }
277}
278
279void __init bdev_cache_init(void)
280{
281 int i, err;
282 struct list_head *head = bdev_hashtable;
283
284 i = HASH_SIZE;
285 do {
286 INIT_LIST_HEAD(head);
287 head++;
288 i--;
289 } while (i);
290
291 bdev_cachep = kmem_cache_create("bdev_cache",
292 sizeof(struct block_device),
293 0, SLAB_HWCACHE_ALIGN, init_once,
294 NULL);
295 if (!bdev_cachep)
296 panic("Cannot create bdev_cache SLAB cache");
297 err = register_filesystem(&bd_type);
298 if (err)
299 panic("Cannot register bdev pseudo-fs");
300 bd_mnt = kern_mount(&bd_type);
301 err = PTR_ERR(bd_mnt);
302 if (IS_ERR(bd_mnt))
303 panic("Cannot create bdev pseudo-fs");
304}
305
306
307
308
309
310static inline unsigned long hash(dev_t dev)
311{
312 unsigned long tmp = dev;
313 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
314 return tmp & HASH_MASK;
315}
316
317static struct block_device *bdfind(dev_t dev, struct list_head *head)
318{
319 struct list_head *p;
320 struct block_device *bdev;
321 for (p=head->next; p!=head; p=p->next) {
322 bdev = list_entry(p, struct block_device, bd_hash);
323 if (bdev->bd_dev != dev)
324 continue;
325 atomic_inc(&bdev->bd_count);
326 return bdev;
327 }
328 return NULL;
329}
330
331struct block_device *bdget(dev_t dev)
332{
333 struct list_head * head = bdev_hashtable + hash(dev);
334 struct block_device *bdev, *new_bdev;
335 spin_lock(&bdev_lock);
336 bdev = bdfind(dev, head);
337 spin_unlock(&bdev_lock);
338 if (bdev)
339 return bdev;
340 new_bdev = alloc_bdev();
341 if (new_bdev) {
342 struct inode *inode = new_inode(bd_mnt->mnt_sb);
343 if (inode) {
344 kdev_t kdev = to_kdev_t(dev);
345 atomic_set(&new_bdev->bd_count,1);
346 new_bdev->bd_dev = dev;
347 new_bdev->bd_op = NULL;
348 new_bdev->bd_inode = inode;
349 inode->i_rdev = kdev;
350 inode->i_dev = kdev;
351 inode->i_bdev = new_bdev;
352 inode->i_data.a_ops = &def_blk_aops;
353 inode->i_data.gfp_mask = GFP_USER;
354 inode->i_mode = S_IFBLK;
355 spin_lock(&bdev_lock);
356 bdev = bdfind(dev, head);
357 if (!bdev) {
358 list_add(&new_bdev->bd_hash, head);
359 spin_unlock(&bdev_lock);
360 return new_bdev;
361 }
362 spin_unlock(&bdev_lock);
363 iput(new_bdev->bd_inode);
364 }
365 destroy_bdev(new_bdev);
366 }
367 return bdev;
368}
369
370static inline void __bd_forget(struct inode *inode)
371{
372 list_del_init(&inode->i_devices);
373 inode->i_bdev = NULL;
374 inode->i_mapping = &inode->i_data;
375}
376
377void bdput(struct block_device *bdev)
378{
379 if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
380 struct list_head *p;
381 if (bdev->bd_openers)
382 BUG();
383 list_del(&bdev->bd_hash);
384 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
385 __bd_forget(list_entry(p, struct inode, i_devices));
386 }
387 spin_unlock(&bdev_lock);
388 iput(bdev->bd_inode);
389 destroy_bdev(bdev);
390 }
391}
392
393int bd_acquire(struct inode *inode)
394{
395 struct block_device *bdev;
396 spin_lock(&bdev_lock);
397 if (inode->i_bdev) {
398 atomic_inc(&inode->i_bdev->bd_count);
399 spin_unlock(&bdev_lock);
400 return 0;
401 }
402 spin_unlock(&bdev_lock);
403 bdev = bdget(kdev_t_to_nr(inode->i_rdev));
404 if (!bdev)
405 return -ENOMEM;
406 spin_lock(&bdev_lock);
407 if (!inode->i_bdev) {
408 inode->i_bdev = bdev;
409 inode->i_mapping = bdev->bd_inode->i_mapping;
410 list_add(&inode->i_devices, &bdev->bd_inodes);
411 } else if (inode->i_bdev != bdev)
412 BUG();
413 spin_unlock(&bdev_lock);
414 return 0;
415}
416
417
418
419void bd_forget(struct inode *inode)
420{
421 spin_lock(&bdev_lock);
422 if (inode->i_bdev)
423 __bd_forget(inode);
424 spin_unlock(&bdev_lock);
425}
426
427static struct {
428 const char *name;
429 struct block_device_operations *bdops;
430} blkdevs[MAX_BLKDEV];
431
432int get_blkdev_list(char * p)
433{
434 int i;
435 int len;
436
437 len = sprintf(p, "\nBlock devices:\n");
438 for (i = 0; i < MAX_BLKDEV ; i++) {
439 if (blkdevs[i].bdops) {
440 len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name);
441 }
442 }
443 return len;
444}
445
446
447
448
449
450const struct block_device_operations * get_blkfops(unsigned int major)
451{
452 const struct block_device_operations *ret = NULL;
453
454
455 if (major && major < MAX_BLKDEV) {
456#ifdef CONFIG_KMOD
457 if (!blkdevs[major].bdops) {
458 char name[20];
459 sprintf(name, "block-major-%d", major);
460 request_module(name);
461 }
462#endif
463 ret = blkdevs[major].bdops;
464 }
465 return ret;
466}
467
468int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops)
469{
470 if (major == 0) {
471 for (major = MAX_BLKDEV-1; major > 0; major--) {
472 if (blkdevs[major].bdops == NULL) {
473 blkdevs[major].name = name;
474 blkdevs[major].bdops = bdops;
475 return major;
476 }
477 }
478 return -EBUSY;
479 }
480 if (major >= MAX_BLKDEV)
481 return -EINVAL;
482 if (blkdevs[major].bdops && blkdevs[major].bdops != bdops)
483 return -EBUSY;
484 blkdevs[major].name = name;
485 blkdevs[major].bdops = bdops;
486 return 0;
487}
488
489int unregister_blkdev(unsigned int major, const char * name)
490{
491 if (major >= MAX_BLKDEV)
492 return -EINVAL;
493 if (!blkdevs[major].bdops)
494 return -EINVAL;
495 if (strcmp(blkdevs[major].name, name))
496 return -EINVAL;
497 blkdevs[major].name = NULL;
498 blkdevs[major].bdops = NULL;
499 return 0;
500}
501
502
503
504
505
506
507
508
509
510
511int check_disk_change(kdev_t dev)
512{
513 int i;
514 const struct block_device_operations * bdops = NULL;
515
516 i = MAJOR(dev);
517 if (i < MAX_BLKDEV)
518 bdops = blkdevs[i].bdops;
519 if (bdops == NULL) {
520 devfs_handle_t de;
521
522 de = devfs_find_handle (NULL, NULL, i, MINOR (dev),
523 DEVFS_SPECIAL_BLK, 0);
524 if (de) {
525 bdops = devfs_get_ops (de);
526 devfs_put_ops (de);
527 }
528 }
529 if (bdops == NULL)
530 return 0;
531 if (bdops->check_media_change == NULL)
532 return 0;
533 if (!bdops->check_media_change(dev))
534 return 0;
535
536 if (invalidate_device(dev, 0))
537 printk("VFS: busy inodes on changed media.\n");
538
539 if (bdops->revalidate)
540 bdops->revalidate(dev);
541 return 1;
542}
543
544int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
545{
546 int res;
547 mm_segment_t old_fs = get_fs();
548
549 if (!bdev->bd_op->ioctl)
550 return -EINVAL;
551 set_fs(KERNEL_DS);
552 res = bdev->bd_op->ioctl(bdev->bd_inode, NULL, cmd, arg);
553 set_fs(old_fs);
554 return res;
555}
556
557static int do_open(struct block_device *bdev, struct inode *inode, struct file *file)
558{
559 int ret = -ENXIO;
560 kdev_t dev = to_kdev_t(bdev->bd_dev);
561
562 down(&bdev->bd_sem);
563 lock_kernel();
564 if (!bdev->bd_op)
565 bdev->bd_op = get_blkfops(MAJOR(dev));
566 if (bdev->bd_op) {
567 ret = 0;
568 if (bdev->bd_op->owner)
569 __MOD_INC_USE_COUNT(bdev->bd_op->owner);
570 if (bdev->bd_op->open)
571 ret = bdev->bd_op->open(inode, file);
572 if (!ret) {
573 bdev->bd_openers++;
574 bdev->bd_inode->i_size = blkdev_size(dev);
575 bdev->bd_inode->i_blkbits = blksize_bits(block_size(dev));
576 } else {
577 if (bdev->bd_op->owner)
578 __MOD_DEC_USE_COUNT(bdev->bd_op->owner);
579 if (!bdev->bd_openers)
580 bdev->bd_op = NULL;
581 }
582 }
583 unlock_kernel();
584 up(&bdev->bd_sem);
585 if (ret)
586 bdput(bdev);
587 return ret;
588}
589
590int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
591{
592
593
594
595
596
597
598 struct file fake_file = {};
599 struct dentry fake_dentry = {};
600 fake_file.f_mode = mode;
601 fake_file.f_flags = flags;
602 fake_file.f_dentry = &fake_dentry;
603 fake_dentry.d_inode = bdev->bd_inode;
604
605 return do_open(bdev, bdev->bd_inode, &fake_file);
606}
607
608int blkdev_open(struct inode * inode, struct file * filp)
609{
610 struct block_device *bdev;
611
612
613
614
615
616
617
618 filp->f_flags |= O_LARGEFILE;
619
620 bd_acquire(inode);
621 bdev = inode->i_bdev;
622
623 return do_open(bdev, inode, filp);
624}
625
626int blkdev_put(struct block_device *bdev, int kind)
627{
628 int ret = 0;
629 kdev_t rdev = to_kdev_t(bdev->bd_dev);
630 struct inode *bd_inode = bdev->bd_inode;
631
632 down(&bdev->bd_sem);
633 lock_kernel();
634 if (kind == BDEV_FILE)
635 __block_fsync(bd_inode);
636 else if (kind == BDEV_FS)
637 fsync_no_super(rdev);
638 if (!--bdev->bd_openers)
639 kill_bdev(bdev);
640 if (bdev->bd_op->release)
641 ret = bdev->bd_op->release(bd_inode, NULL);
642 if (bdev->bd_op->owner)
643 __MOD_DEC_USE_COUNT(bdev->bd_op->owner);
644 if (!bdev->bd_openers)
645 bdev->bd_op = NULL;
646 unlock_kernel();
647 up(&bdev->bd_sem);
648 bdput(bdev);
649 return ret;
650}
651
652int blkdev_close(struct inode * inode, struct file * filp)
653{
654 return blkdev_put(inode->i_bdev, BDEV_FILE);
655}
656
657static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
658 unsigned long arg)
659{
660 if (inode->i_bdev->bd_op->ioctl)
661 return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
662 return -EINVAL;
663}
664
665struct address_space_operations def_blk_aops = {
666 readpage: blkdev_readpage,
667 writepage: blkdev_writepage,
668 sync_page: block_sync_page,
669 prepare_write: blkdev_prepare_write,
670 commit_write: blkdev_commit_write,
671 direct_IO: blkdev_direct_IO,
672};
673
674struct file_operations def_blk_fops = {
675 open: blkdev_open,
676 release: blkdev_close,
677 llseek: block_llseek,
678 read: generic_file_read,
679 write: generic_file_write,
680 mmap: generic_file_mmap,
681 fsync: block_fsync,
682 ioctl: blkdev_ioctl,
683};
684
685const char * bdevname(kdev_t dev)
686{
687 static char buffer[32];
688 const char * name = blkdevs[MAJOR(dev)].name;
689
690 if (!name)
691 name = "unknown-block";
692
693 sprintf(buffer, "%s(%d,%d)", name, MAJOR(dev), MINOR(dev));
694 return buffer;
695}
696