linux-bk/fs/block_dev.c
<<
>>
Prefs
   1/*
   2 *  linux/fs/block_dev.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 *  Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
   6 */
   7
   8#include <linux/config.h>
   9#include <linux/init.h>
  10#include <linux/mm.h>
  11#include <linux/fcntl.h>
  12#include <linux/slab.h>
  13#include <linux/kmod.h>
  14#include <linux/major.h>
  15#include <linux/devfs_fs_kernel.h>
  16#include <linux/smp_lock.h>
  17#include <linux/iobuf.h>
  18#include <linux/highmem.h>
  19#include <linux/blkdev.h>
  20#include <linux/module.h>
  21#include <linux/blkpg.h>
  22#include <linux/buffer_head.h>
  23#include <linux/mpage.h>
  24
  25#include <asm/uaccess.h>
  26
  27static sector_t max_block(struct block_device *bdev)
  28{
  29        sector_t retval = ~((sector_t)0);
  30        loff_t sz = bdev->bd_inode->i_size;
  31
  32        if (sz) {
  33                unsigned int size = block_size(bdev);
  34                unsigned int sizebits = blksize_bits(size);
  35                retval = (sz >> sizebits);
  36        }
  37        return retval;
  38}
  39
  40/* Kill _all_ buffers, dirty or not.. */
  41static void kill_bdev(struct block_device *bdev)
  42{
  43        invalidate_bdev(bdev, 1);
  44        truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
  45}       
  46
  47int set_blocksize(struct block_device *bdev, int size)
  48{
  49        int oldsize;
  50
  51        /* Size must be a power of two, and between 512 and PAGE_SIZE */
  52        if (size > PAGE_SIZE || size < 512 || (size & (size-1)))
  53                return -EINVAL;
  54
  55        /* Size cannot be smaller than the size supported by the device */
  56        if (size < bdev_hardsect_size(bdev))
  57                return -EINVAL;
  58
  59        oldsize = bdev->bd_block_size;
  60        if (oldsize == size)
  61                return 0;
  62
  63        /* Ok, we're actually changing the blocksize.. */
  64        sync_blockdev(bdev);
  65        bdev->bd_block_size = size;
  66        bdev->bd_inode->i_blkbits = blksize_bits(size);
  67        kill_bdev(bdev);
  68        return 0;
  69}
  70
  71int sb_set_blocksize(struct super_block *sb, int size)
  72{
  73        int bits;
  74        if (set_blocksize(sb->s_bdev, size) < 0)
  75                return 0;
  76        sb->s_blocksize = size;
  77        for (bits = 9, size >>= 9; size >>= 1; bits++)
  78                ;
  79        sb->s_blocksize_bits = bits;
  80        return sb->s_blocksize;
  81}
  82
  83int sb_min_blocksize(struct super_block *sb, int size)
  84{
  85        int minsize = bdev_hardsect_size(sb->s_bdev);
  86        if (size < minsize)
  87                size = minsize;
  88        return sb_set_blocksize(sb, size);
  89}
  90
  91static int
  92blkdev_get_block(struct inode *inode, sector_t iblock,
  93                struct buffer_head *bh, int create)
  94{
  95        if (iblock >= max_block(inode->i_bdev))
  96                return -EIO;
  97
  98        bh->b_bdev = inode->i_bdev;
  99        bh->b_blocknr = iblock;
 100        set_buffer_mapped(bh);
 101        return 0;
 102}
 103
 104static int
 105blkdev_get_blocks(struct inode *inode, sector_t iblock,
 106                unsigned long max_blocks, struct buffer_head *bh, int create)
 107{
 108        if ((iblock + max_blocks) > max_block(inode->i_bdev))
 109                return -EIO;
 110
 111        bh->b_bdev = inode->i_bdev;
 112        bh->b_blocknr = iblock;
 113        bh->b_size = max_blocks << inode->i_blkbits;
 114        set_buffer_mapped(bh);
 115        return 0;
 116}
 117
 118static int
 119blkdev_direct_IO(int rw, struct inode *inode, const struct iovec *iov,
 120                        loff_t offset, unsigned long nr_segs)
 121{
 122        return generic_direct_IO(rw, inode, iov, offset,
 123                                nr_segs, blkdev_get_blocks);
 124}
 125
 126static int blkdev_writepage(struct page * page)
 127{
 128        return block_write_full_page(page, blkdev_get_block);
 129}
 130
 131static int blkdev_readpage(struct file * file, struct page * page)
 132{
 133        return block_read_full_page(page, blkdev_get_block);
 134}
 135
 136static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
 137{
 138        return block_prepare_write(page, from, to, blkdev_get_block);
 139}
 140
 141static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
 142{
 143        return block_commit_write(page, from, to);
 144}
 145
 146/*
 147 * private llseek:
 148 * for a block special file file->f_dentry->d_inode->i_size is zero
 149 * so we compute the size by hand (just as in block_read/write above)
 150 */
 151static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 152{
 153        /* ewww */
 154        loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size;
 155        loff_t retval;
 156
 157        lock_kernel();
 158
 159        switch (origin) {
 160                case 2:
 161                        offset += size;
 162                        break;
 163                case 1:
 164                        offset += file->f_pos;
 165        }
 166        retval = -EINVAL;
 167        if (offset >= 0 && offset <= size) {
 168                if (offset != file->f_pos) {
 169                        file->f_pos = offset;
 170                        file->f_version = ++event;
 171                }
 172                retval = offset;
 173        }
 174        unlock_kernel();
 175        return retval;
 176}
 177        
 178/*
 179 *      Filp may be NULL when we are called by an msync of a vma
 180 *      since the vma has no handle.
 181 */
 182 
 183static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
 184{
 185        struct inode * inode = dentry->d_inode;
 186
 187        return sync_blockdev(inode->i_bdev);
 188}
 189
 190/*
 191 * pseudo-fs
 192 */
 193
 194static struct super_block *bd_get_sb(struct file_system_type *fs_type,
 195        int flags, char *dev_name, void *data)
 196{
 197        return get_sb_pseudo(fs_type, "bdev:", NULL, 0x62646576);
 198}
 199
 200static struct file_system_type bd_type = {
 201        .name           = "bdev",
 202        .get_sb         = bd_get_sb,
 203        .kill_sb        = kill_anon_super,
 204};
 205
 206static struct vfsmount *bd_mnt;
 207struct super_block *blockdev_superblock;
 208
 209/*
 210 * bdev cache handling - shamelessly stolen from inode.c
 211 * We use smaller hashtable, though.
 212 */
 213
 214#define HASH_BITS       6
 215#define HASH_SIZE       (1UL << HASH_BITS)
 216#define HASH_MASK       (HASH_SIZE-1)
 217static struct list_head bdev_hashtable[HASH_SIZE];
 218static spinlock_t bdev_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 219static kmem_cache_t * bdev_cachep;
 220
 221#define alloc_bdev() \
 222         ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
 223#define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
 224
 225static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 226{
 227        struct block_device * bdev = (struct block_device *) foo;
 228
 229        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 230            SLAB_CTOR_CONSTRUCTOR)
 231        {
 232                memset(bdev, 0, sizeof(*bdev));
 233                sema_init(&bdev->bd_sem, 1);
 234                INIT_LIST_HEAD(&bdev->bd_inodes);
 235        }
 236}
 237
 238void __init bdev_cache_init(void)
 239{
 240        int i, err;
 241        struct list_head *head = bdev_hashtable;
 242
 243        i = HASH_SIZE;
 244        do {
 245                INIT_LIST_HEAD(head);
 246                head++;
 247                i--;
 248        } while (i);
 249
 250        bdev_cachep = kmem_cache_create("bdev_cache",
 251                                         sizeof(struct block_device),
 252                                         0, SLAB_HWCACHE_ALIGN, init_once,
 253                                         NULL);
 254        if (!bdev_cachep)
 255                panic("Cannot create bdev_cache SLAB cache");
 256        err = register_filesystem(&bd_type);
 257        if (err)
 258                panic("Cannot register bdev pseudo-fs");
 259        bd_mnt = kern_mount(&bd_type);
 260        err = PTR_ERR(bd_mnt);
 261        if (IS_ERR(bd_mnt))
 262                panic("Cannot create bdev pseudo-fs");
 263        blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
 264}
 265
 266/*
 267 * Most likely _very_ bad one - but then it's hardly critical for small
 268 * /dev and can be fixed when somebody will need really large one.
 269 */
 270static inline unsigned long hash(dev_t dev)
 271{
 272        unsigned long tmp = dev;
 273        tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
 274        return tmp & HASH_MASK;
 275}
 276
 277static struct block_device *bdfind(dev_t dev, struct list_head *head)
 278{
 279        struct list_head *p;
 280        struct block_device *bdev;
 281        list_for_each(p, head) {
 282                bdev = list_entry(p, struct block_device, bd_hash);
 283                if (bdev->bd_dev != dev)
 284                        continue;
 285                atomic_inc(&bdev->bd_count);
 286                return bdev;
 287        }
 288        return NULL;
 289}
 290
 291struct block_device *bdget(dev_t dev)
 292{
 293        struct list_head * head = bdev_hashtable + hash(dev);
 294        struct block_device *bdev, *new_bdev;
 295        spin_lock(&bdev_lock);
 296        bdev = bdfind(dev, head);
 297        spin_unlock(&bdev_lock);
 298        if (bdev)
 299                return bdev;
 300        new_bdev = alloc_bdev();
 301        if (new_bdev) {
 302                struct inode *inode = new_inode(bd_mnt->mnt_sb);
 303                if (inode) {
 304                        kdev_t kdev = to_kdev_t(dev);
 305
 306                        atomic_set(&new_bdev->bd_count,1);
 307                        new_bdev->bd_dev = dev;
 308                        new_bdev->bd_op = NULL;
 309                        new_bdev->bd_queue = NULL;
 310                        new_bdev->bd_contains = NULL;
 311                        new_bdev->bd_inode = inode;
 312                        new_bdev->bd_part_count = 0;
 313                        new_bdev->bd_invalidated = 0;
 314                        inode->i_mode = S_IFBLK;
 315                        inode->i_rdev = kdev;
 316                        inode->i_bdev = new_bdev;
 317                        inode->i_data.a_ops = &def_blk_aops;
 318                        inode->i_data.gfp_mask = GFP_USER;
 319                        inode->i_data.backing_dev_info = &default_backing_dev_info;
 320                        spin_lock(&bdev_lock);
 321                        bdev = bdfind(dev, head);
 322                        if (!bdev) {
 323                                list_add(&new_bdev->bd_hash, head);
 324                                spin_unlock(&bdev_lock);
 325                                return new_bdev;
 326                        }
 327                        spin_unlock(&bdev_lock);
 328                        iput(new_bdev->bd_inode);
 329                }
 330                destroy_bdev(new_bdev);
 331        }
 332        return bdev;
 333}
 334
 335long nr_blockdev_pages(void)
 336{
 337        long ret = 0;
 338        int i;
 339
 340        spin_lock(&bdev_lock);
 341        for (i = 0; i < ARRAY_SIZE(bdev_hashtable); i++) {
 342                struct list_head *head = &bdev_hashtable[i];
 343                struct list_head *lh;
 344
 345                if (head == NULL)
 346                        continue;
 347                list_for_each(lh, head) {
 348                        struct block_device *bdev;
 349
 350                        bdev = list_entry(lh, struct block_device, bd_hash);
 351                        ret += bdev->bd_inode->i_mapping->nrpages;
 352                }
 353        }
 354        spin_unlock(&bdev_lock);
 355        return ret;
 356}
 357
 358static inline void __bd_forget(struct inode *inode)
 359{
 360        list_del_init(&inode->i_devices);
 361        inode->i_bdev = NULL;
 362        inode->i_mapping = &inode->i_data;
 363}
 364
 365void bdput(struct block_device *bdev)
 366{
 367        if (atomic_dec_and_lock(&bdev->bd_count, &bdev_lock)) {
 368                struct list_head *p;
 369                if (bdev->bd_openers)
 370                        BUG();
 371                list_del(&bdev->bd_hash);
 372                while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
 373                        __bd_forget(list_entry(p, struct inode, i_devices));
 374                }
 375                spin_unlock(&bdev_lock);
 376                iput(bdev->bd_inode);
 377                destroy_bdev(bdev);
 378        }
 379}
 380 
 381int bd_acquire(struct inode *inode)
 382{
 383        struct block_device *bdev;
 384        spin_lock(&bdev_lock);
 385        if (inode->i_bdev) {
 386                atomic_inc(&inode->i_bdev->bd_count);
 387                spin_unlock(&bdev_lock);
 388                return 0;
 389        }
 390        spin_unlock(&bdev_lock);
 391        bdev = bdget(kdev_t_to_nr(inode->i_rdev));
 392        if (!bdev)
 393                return -ENOMEM;
 394        spin_lock(&bdev_lock);
 395        if (!inode->i_bdev) {
 396                inode->i_bdev = bdev;
 397                inode->i_mapping = bdev->bd_inode->i_mapping;
 398                list_add(&inode->i_devices, &bdev->bd_inodes);
 399        } else if (inode->i_bdev != bdev)
 400                BUG();
 401        spin_unlock(&bdev_lock);
 402        return 0;
 403}
 404
 405/* Call when you free inode */
 406
 407void bd_forget(struct inode *inode)
 408{
 409        spin_lock(&bdev_lock);
 410        if (inode->i_bdev)
 411                __bd_forget(inode);
 412        spin_unlock(&bdev_lock);
 413}
 414
 415int bd_claim(struct block_device *bdev, void *holder)
 416{
 417        int res = -EBUSY;
 418        spin_lock(&bdev_lock);
 419        if (!bdev->bd_holder || bdev->bd_holder == holder) {
 420                bdev->bd_holder = holder;
 421                bdev->bd_holders++;
 422                res = 0;
 423        }
 424        spin_unlock(&bdev_lock);
 425        return res;
 426}
 427
 428void bd_release(struct block_device *bdev)
 429{
 430        spin_lock(&bdev_lock);
 431        if (!--bdev->bd_holders)
 432                bdev->bd_holder = NULL;
 433        spin_unlock(&bdev_lock);
 434}
 435
 436static struct {
 437        const char *name;
 438        struct block_device_operations *bdops;
 439} blkdevs[MAX_BLKDEV];
 440
 441int get_blkdev_list(char * p)
 442{
 443        int i;
 444        int len;
 445
 446        len = sprintf(p, "\nBlock devices:\n");
 447        for (i = 0; i < MAX_BLKDEV ; i++) {
 448                if (blkdevs[i].bdops) {
 449                        len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name);
 450                }
 451        }
 452        return len;
 453}
 454
 455/*
 456        Return the function table of a device.
 457        Load the driver if needed.
 458*/
 459struct block_device_operations * get_blkfops(unsigned int major)
 460{
 461        struct block_device_operations *ret = NULL;
 462
 463        /* major 0 is used for non-device mounts */
 464        if (major && major < MAX_BLKDEV) {
 465#ifdef CONFIG_KMOD
 466                if (!blkdevs[major].bdops) {
 467                        char name[20];
 468                        sprintf(name, "block-major-%d", major);
 469                        request_module(name);
 470                }
 471#endif
 472                ret = blkdevs[major].bdops;
 473        }
 474        return ret;
 475}
 476
 477int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops)
 478{
 479        if (devfs_only())
 480                return 0;
 481        if (major == 0) {
 482                for (major = MAX_BLKDEV-1; major > 0; major--) {
 483                        if (blkdevs[major].bdops == NULL) {
 484                                blkdevs[major].name = name;
 485                                blkdevs[major].bdops = bdops;
 486                                return major;
 487                        }
 488                }
 489                return -EBUSY;
 490        }
 491        if (major >= MAX_BLKDEV)
 492                return -EINVAL;
 493        if (blkdevs[major].bdops && blkdevs[major].bdops != bdops)
 494                return -EBUSY;
 495        blkdevs[major].name = name;
 496        blkdevs[major].bdops = bdops;
 497        return 0;
 498}
 499
 500int unregister_blkdev(unsigned int major, const char * name)
 501{
 502        if (devfs_only())
 503                return 0;
 504        if (major >= MAX_BLKDEV)
 505                return -EINVAL;
 506        if (!blkdevs[major].bdops)
 507                return -EINVAL;
 508        if (strcmp(blkdevs[major].name, name))
 509                return -EINVAL;
 510        blkdevs[major].name = NULL;
 511        blkdevs[major].bdops = NULL;
 512        return 0;
 513}
 514
 515/*
 516 * This routine checks whether a removable media has been changed,
 517 * and invalidates all buffer-cache-entries in that case. This
 518 * is a relatively slow routine, so we have to try to minimize using
 519 * it. Thus it is called only upon a 'mount' or 'open'. This
 520 * is the best way of combining speed and utility, I think.
 521 * People changing diskettes in the middle of an operation deserve
 522 * to lose :-)
 523 */
 524int check_disk_change(struct block_device *bdev)
 525{
 526        struct block_device_operations * bdops = bdev->bd_op;
 527        kdev_t dev = to_kdev_t(bdev->bd_dev);
 528        struct gendisk *disk;
 529
 530        if (bdops->check_media_change == NULL)
 531                return 0;
 532        if (!bdops->check_media_change(dev))
 533                return 0;
 534
 535        if (invalidate_device(dev, 0))
 536                printk("VFS: busy inodes on changed media.\n");
 537
 538        disk = get_gendisk(dev);
 539        if (bdops->revalidate)
 540                bdops->revalidate(dev);
 541        if (disk && disk->minor_shift)
 542                bdev->bd_invalidated = 1;
 543        return 1;
 544}
 545
 546int full_check_disk_change(struct block_device *bdev)
 547{
 548        int res = 0;
 549        if (bdev->bd_contains != bdev)
 550                BUG();
 551        down(&bdev->bd_sem);
 552        if (check_disk_change(bdev)) {
 553                rescan_partitions(get_gendisk(to_kdev_t(bdev->bd_dev)), bdev);
 554                res = 1;
 555        }
 556        up(&bdev->bd_sem);
 557        return res;
 558}
 559
 560/*
 561 * Will die as soon as two remaining callers get converted.
 562 */
 563int __check_disk_change(dev_t dev)
 564{
 565        struct block_device *bdev = bdget(dev);
 566        int res;
 567        if (!bdev)
 568                return 0;
 569        if (blkdev_get(bdev, FMODE_READ, 0, BDEV_RAW) < 0)
 570                return 0;
 571        res = full_check_disk_change(bdev);
 572        blkdev_put(bdev, BDEV_RAW);
 573        return res;
 574}
 575
 576static void bd_set_size(struct block_device *bdev, loff_t size)
 577{
 578        unsigned bsize = bdev_hardsect_size(bdev);
 579        bdev->bd_inode->i_size = size;
 580        while (bsize < PAGE_CACHE_SIZE) {
 581                if (size & bsize)
 582                        break;
 583                bsize <<= 1;
 584        }
 585        bdev->bd_block_size = bsize;
 586        bdev->bd_inode->i_blkbits = blksize_bits(bsize);
 587}
 588
 589static int do_open(struct block_device *bdev, struct inode *inode, struct file *file)
 590{
 591        int ret = -ENXIO;
 592        kdev_t dev = to_kdev_t(bdev->bd_dev);
 593        struct module *owner = NULL;
 594        struct block_device_operations *ops, *old;
 595
 596        lock_kernel();
 597        ops = get_blkfops(major(dev));
 598        if (ops) {
 599                owner = ops->owner;
 600                if (owner)
 601                        __MOD_INC_USE_COUNT(owner);
 602        }
 603
 604        down(&bdev->bd_sem);
 605        old = bdev->bd_op;
 606        if (!old) {
 607                if (!ops)
 608                        goto out;
 609                bdev->bd_op = ops;
 610        } else {
 611                if (owner)
 612                        __MOD_DEC_USE_COUNT(owner);
 613        }
 614        if (!bdev->bd_contains) {
 615                unsigned minor = minor(dev);
 616                struct gendisk *g = get_gendisk(dev);
 617                bdev->bd_contains = bdev;
 618                if (g) {
 619                        unsigned minor0 = g->first_minor;
 620                        if (minor != minor0) {
 621                                struct block_device *disk;
 622                                disk = bdget(MKDEV(major(dev), minor0));
 623                                ret = -ENOMEM;
 624                                if (!disk)
 625                                        goto out1;
 626                                ret = blkdev_get(disk, file->f_mode, file->f_flags, BDEV_RAW);
 627                                if (ret)
 628                                        goto out1;
 629                                bdev->bd_contains = disk;
 630                        }
 631                }
 632        }
 633        if (bdev->bd_contains == bdev) {
 634                struct gendisk *g = get_gendisk(dev);
 635
 636                if (!bdev->bd_queue) {
 637                        struct blk_dev_struct *p = blk_dev + major(dev);
 638                        bdev->bd_queue = &p->request_queue;
 639                        if (p->queue)
 640                                bdev->bd_queue =  p->queue(dev);
 641                }
 642
 643                if (bdev->bd_op->open) {
 644                        ret = bdev->bd_op->open(inode, file);
 645                        if (ret)
 646                                goto out2;
 647                }
 648                if (!bdev->bd_openers) {
 649                        struct backing_dev_info *bdi;
 650                        sector_t sect = 0;
 651
 652                        bdev->bd_offset = 0;
 653                        if (g)
 654                                sect = get_capacity(g);
 655                        else if (blk_size[major(dev)])
 656                                sect = blk_size[major(dev)][minor(dev)] << 1;
 657                        bd_set_size(bdev, (loff_t)sect << 9);
 658                        bdi = blk_get_backing_dev_info(bdev);
 659                        if (bdi == NULL)
 660                                bdi = &default_backing_dev_info;
 661                        inode->i_data.backing_dev_info = bdi;
 662                        bdev->bd_inode->i_data.backing_dev_info = bdi;
 663                }
 664                if (bdev->bd_invalidated)
 665                        rescan_partitions(g, bdev);
 666        } else {
 667                down(&bdev->bd_contains->bd_sem);
 668                bdev->bd_contains->bd_part_count++;
 669                if (!bdev->bd_openers) {
 670                        struct gendisk *g = get_gendisk(dev);
 671                        struct hd_struct *p;
 672                        p = g->part + minor(dev) - g->first_minor - 1;
 673                        inode->i_data.backing_dev_info =
 674                           bdev->bd_inode->i_data.backing_dev_info =
 675                           bdev->bd_contains->bd_inode->i_data.backing_dev_info;
 676                        if (!p->nr_sects) {
 677                                bdev->bd_contains->bd_part_count--;
 678                                up(&bdev->bd_contains->bd_sem);
 679                                ret = -ENXIO;
 680                                goto out2;
 681                        }
 682                        bdev->bd_queue = bdev->bd_contains->bd_queue;
 683                        bdev->bd_offset = p->start_sect;
 684                        bd_set_size(bdev, (loff_t) p->nr_sects << 9);
 685                }
 686                up(&bdev->bd_contains->bd_sem);
 687        }
 688        bdev->bd_openers++;
 689        up(&bdev->bd_sem);
 690        unlock_kernel();
 691        return 0;
 692
 693out2:
 694        if (!bdev->bd_openers) {
 695                bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 696                if (bdev != bdev->bd_contains) {
 697                        blkdev_put(bdev->bd_contains, BDEV_RAW);
 698                        bdev->bd_contains = NULL;
 699                }
 700        }
 701out1:
 702        if (!old) {
 703                bdev->bd_op = NULL;
 704                if (owner)
 705                        __MOD_DEC_USE_COUNT(owner);
 706        }
 707out:
 708        up(&bdev->bd_sem);
 709        unlock_kernel();
 710        if (ret)
 711                bdput(bdev);
 712        return ret;
 713}
 714
 715int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
 716{
 717        /*
 718         * This crockload is due to bad choice of ->open() type.
 719         * It will go away.
 720         * For now, block device ->open() routine must _not_
 721         * examine anything in 'inode' argument except ->i_rdev.
 722         */
 723        struct file fake_file = {};
 724        struct dentry fake_dentry = {};
 725        fake_file.f_mode = mode;
 726        fake_file.f_flags = flags;
 727        fake_file.f_dentry = &fake_dentry;
 728        fake_dentry.d_inode = bdev->bd_inode;
 729
 730        return do_open(bdev, bdev->bd_inode, &fake_file);
 731}
 732
 733int blkdev_open(struct inode * inode, struct file * filp)
 734{
 735        struct block_device *bdev;
 736
 737        /*
 738         * Preserve backwards compatibility and allow large file access
 739         * even if userspace doesn't ask for it explicitly. Some mkfs
 740         * binary needs it. We might want to drop this workaround
 741         * during an unstable branch.
 742         */
 743        filp->f_flags |= O_LARGEFILE;
 744
 745        bd_acquire(inode);
 746        bdev = inode->i_bdev;
 747
 748        return do_open(bdev, inode, filp);
 749}       
 750
 751int blkdev_put(struct block_device *bdev, int kind)
 752{
 753        int ret = 0;
 754        struct inode *bd_inode = bdev->bd_inode;
 755
 756        down(&bdev->bd_sem);
 757        lock_kernel();
 758        switch (kind) {
 759        case BDEV_FILE:
 760        case BDEV_FS:
 761                sync_blockdev(bd_inode->i_bdev);
 762                break;
 763        }
 764        if (!--bdev->bd_openers)
 765                kill_bdev(bdev);
 766        if (bdev->bd_contains == bdev) {
 767                if (bdev->bd_op->release)
 768                        ret = bdev->bd_op->release(bd_inode, NULL);
 769        } else {
 770                down(&bdev->bd_contains->bd_sem);
 771                bdev->bd_contains->bd_part_count--;
 772                up(&bdev->bd_contains->bd_sem);
 773        }
 774        if (!bdev->bd_openers) {
 775                if (bdev->bd_op->owner)
 776                        __MOD_DEC_USE_COUNT(bdev->bd_op->owner);
 777                bdev->bd_op = NULL;
 778                bdev->bd_queue = NULL;
 779                bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
 780                if (bdev != bdev->bd_contains) {
 781                        blkdev_put(bdev->bd_contains, BDEV_RAW);
 782                        bdev->bd_contains = NULL;
 783                }
 784        }
 785        unlock_kernel();
 786        up(&bdev->bd_sem);
 787        bdput(bdev);
 788        return ret;
 789}
 790
 791int blkdev_close(struct inode * inode, struct file * filp)
 792{
 793        return blkdev_put(inode->i_bdev, BDEV_FILE);
 794}
 795
 796static int blkdev_reread_part(struct block_device *bdev)
 797{
 798        kdev_t dev = to_kdev_t(bdev->bd_dev);
 799        struct gendisk *disk = get_gendisk(dev);
 800        int res = 0;
 801
 802        if (!disk || !disk->minor_shift || bdev != bdev->bd_contains)
 803                return -EINVAL;
 804        if (!capable(CAP_SYS_ADMIN))
 805                return -EACCES;
 806        if (down_trylock(&bdev->bd_sem))
 807                return -EBUSY;
 808        res = rescan_partitions(disk, bdev);
 809        up(&bdev->bd_sem);
 810        return res;
 811}
 812
 813static ssize_t blkdev_file_write(struct file *file, const char *buf,
 814                                   size_t count, loff_t *ppos)
 815{
 816        struct iovec local_iov = { .iov_base = (void *)buf, .iov_len = count };
 817
 818        return generic_file_write_nolock(file, &local_iov, 1, ppos);
 819}
 820
 821static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
 822                        unsigned long arg)
 823{
 824        struct block_device *bdev = inode->i_bdev;
 825        int ret = -EINVAL;
 826        switch (cmd) {
 827        /*
 828         * deprecated, use the /proc/iosched interface instead
 829         */
 830        case BLKELVGET:
 831        case BLKELVSET:
 832                ret = -ENOTTY;
 833                break;
 834        case BLKRAGET:
 835        case BLKROGET:
 836        case BLKBSZGET:
 837        case BLKSSZGET:
 838        case BLKFRAGET:
 839        case BLKSECTGET:
 840        case BLKRASET:
 841        case BLKFRASET:
 842        case BLKBSZSET:
 843        case BLKPG:
 844                ret = blk_ioctl(bdev, cmd, arg);
 845                break;
 846        case BLKRRPART:
 847                ret = blkdev_reread_part(bdev);
 848                break;
 849        default:
 850                if (bdev->bd_op->ioctl)
 851                        ret =bdev->bd_op->ioctl(inode, file, cmd, arg);
 852                if (ret == -EINVAL) {
 853                        switch (cmd) {
 854                                case BLKGETSIZE:
 855                                case BLKGETSIZE64:
 856                                case BLKFLSBUF:
 857                                case BLKROSET:
 858                                        ret = blk_ioctl(bdev,cmd,arg);
 859                                        break;
 860                        }
 861                }
 862        }
 863        return ret;
 864}
 865
 866struct address_space_operations def_blk_aops = {
 867        .readpage       = blkdev_readpage,
 868        .writepage      = blkdev_writepage,
 869        .sync_page      = block_sync_page,
 870        .prepare_write  = blkdev_prepare_write,
 871        .commit_write   = blkdev_commit_write,
 872        .writepages     = generic_writepages,
 873        .vm_writeback   = generic_vm_writeback,
 874        .direct_IO      = blkdev_direct_IO,
 875};
 876
 877struct file_operations def_blk_fops = {
 878        .open           = blkdev_open,
 879        .release        = blkdev_close,
 880        .llseek         = block_llseek,
 881        .read           = generic_file_read,
 882        .write          = blkdev_file_write,
 883        .mmap           = generic_file_mmap,
 884        .fsync          = block_fsync,
 885        .ioctl          = blkdev_ioctl,
 886        .readv          = generic_file_readv,
 887        .writev         = generic_file_writev,
 888        .sendfile       = generic_file_sendfile,
 889};
 890
 891int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
 892{
 893        int res;
 894        mm_segment_t old_fs = get_fs();
 895        set_fs(KERNEL_DS);
 896        res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg);
 897        set_fs(old_fs);
 898        return res;
 899}
 900
 901const char *__bdevname(kdev_t dev)
 902{
 903        static char buffer[32];
 904        const char * name = blkdevs[major(dev)].name;
 905
 906        if (!name)
 907                name = "unknown-block";
 908
 909        sprintf(buffer, "%s(%d,%d)", name, major(dev), minor(dev));
 910        return buffer;
 911}
 912
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.