linux/block/genhd.c
<<
>>
Prefs
   1/*
   2 *  gendisk handling
   3 */
   4
   5#include <linux/module.h>
   6#include <linux/fs.h>
   7#include <linux/genhd.h>
   8#include <linux/kdev_t.h>
   9#include <linux/kernel.h>
  10#include <linux/blkdev.h>
  11#include <linux/init.h>
  12#include <linux/spinlock.h>
  13#include <linux/proc_fs.h>
  14#include <linux/seq_file.h>
  15#include <linux/slab.h>
  16#include <linux/kmod.h>
  17#include <linux/kobj_map.h>
  18#include <linux/buffer_head.h>
  19#include <linux/mutex.h>
  20#include <linux/idr.h>
  21
  22#include "blk.h"
  23
  24static DEFINE_MUTEX(block_class_lock);
  25#ifndef CONFIG_SYSFS_DEPRECATED
  26struct kobject *block_depr;
  27#endif
  28
  29/* for extended dynamic devt allocation, currently only one major is used */
  30#define MAX_EXT_DEVT            (1 << MINORBITS)
  31
  32/* For extended devt allocation.  ext_devt_mutex prevents look up
  33 * results from going away underneath its user.
  34 */
  35static DEFINE_MUTEX(ext_devt_mutex);
  36static DEFINE_IDR(ext_devt_idr);
  37
  38static struct device_type disk_type;
  39
  40/**
  41 * disk_get_part - get partition
  42 * @disk: disk to look partition from
  43 * @partno: partition number
  44 *
  45 * Look for partition @partno from @disk.  If found, increment
  46 * reference count and return it.
  47 *
  48 * CONTEXT:
  49 * Don't care.
  50 *
  51 * RETURNS:
  52 * Pointer to the found partition on success, NULL if not found.
  53 */
  54struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
  55{
  56        struct hd_struct *part = NULL;
  57        struct disk_part_tbl *ptbl;
  58
  59        if (unlikely(partno < 0))
  60                return NULL;
  61
  62        rcu_read_lock();
  63
  64        ptbl = rcu_dereference(disk->part_tbl);
  65        if (likely(partno < ptbl->len)) {
  66                part = rcu_dereference(ptbl->part[partno]);
  67                if (part)
  68                        get_device(part_to_dev(part));
  69        }
  70
  71        rcu_read_unlock();
  72
  73        return part;
  74}
  75EXPORT_SYMBOL_GPL(disk_get_part);
  76
  77/**
  78 * disk_part_iter_init - initialize partition iterator
  79 * @piter: iterator to initialize
  80 * @disk: disk to iterate over
  81 * @flags: DISK_PITER_* flags
  82 *
  83 * Initialize @piter so that it iterates over partitions of @disk.
  84 *
  85 * CONTEXT:
  86 * Don't care.
  87 */
  88void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
  89                          unsigned int flags)
  90{
  91        struct disk_part_tbl *ptbl;
  92
  93        rcu_read_lock();
  94        ptbl = rcu_dereference(disk->part_tbl);
  95
  96        piter->disk = disk;
  97        piter->part = NULL;
  98
  99        if (flags & DISK_PITER_REVERSE)
 100                piter->idx = ptbl->len - 1;
 101        else if (flags & DISK_PITER_INCL_PART0)
 102                piter->idx = 0;
 103        else
 104                piter->idx = 1;
 105
 106        piter->flags = flags;
 107
 108        rcu_read_unlock();
 109}
 110EXPORT_SYMBOL_GPL(disk_part_iter_init);
 111
 112/**
 113 * disk_part_iter_next - proceed iterator to the next partition and return it
 114 * @piter: iterator of interest
 115 *
 116 * Proceed @piter to the next partition and return it.
 117 *
 118 * CONTEXT:
 119 * Don't care.
 120 */
 121struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
 122{
 123        struct disk_part_tbl *ptbl;
 124        int inc, end;
 125
 126        /* put the last partition */
 127        disk_put_part(piter->part);
 128        piter->part = NULL;
 129
 130        /* get part_tbl */
 131        rcu_read_lock();
 132        ptbl = rcu_dereference(piter->disk->part_tbl);
 133
 134        /* determine iteration parameters */
 135        if (piter->flags & DISK_PITER_REVERSE) {
 136                inc = -1;
 137                if (piter->flags & DISK_PITER_INCL_PART0)
 138                        end = -1;
 139                else
 140                        end = 0;
 141        } else {
 142                inc = 1;
 143                end = ptbl->len;
 144        }
 145
 146        /* iterate to the next partition */
 147        for (; piter->idx != end; piter->idx += inc) {
 148                struct hd_struct *part;
 149
 150                part = rcu_dereference(ptbl->part[piter->idx]);
 151                if (!part)
 152                        continue;
 153                if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
 154                        continue;
 155
 156                get_device(part_to_dev(part));
 157                piter->part = part;
 158                piter->idx += inc;
 159                break;
 160        }
 161
 162        rcu_read_unlock();
 163
 164        return piter->part;
 165}
 166EXPORT_SYMBOL_GPL(disk_part_iter_next);
 167
 168/**
 169 * disk_part_iter_exit - finish up partition iteration
 170 * @piter: iter of interest
 171 *
 172 * Called when iteration is over.  Cleans up @piter.
 173 *
 174 * CONTEXT:
 175 * Don't care.
 176 */
 177void disk_part_iter_exit(struct disk_part_iter *piter)
 178{
 179        disk_put_part(piter->part);
 180        piter->part = NULL;
 181}
 182EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 183
 184/**
 185 * disk_map_sector_rcu - map sector to partition
 186 * @disk: gendisk of interest
 187 * @sector: sector to map
 188 *
 189 * Find out which partition @sector maps to on @disk.  This is
 190 * primarily used for stats accounting.
 191 *
 192 * CONTEXT:
 193 * RCU read locked.  The returned partition pointer is valid only
 194 * while preemption is disabled.
 195 *
 196 * RETURNS:
 197 * Found partition on success, part0 is returned if no partition matches
 198 */
 199struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 200{
 201        struct disk_part_tbl *ptbl;
 202        int i;
 203
 204        ptbl = rcu_dereference(disk->part_tbl);
 205
 206        for (i = 1; i < ptbl->len; i++) {
 207                struct hd_struct *part = rcu_dereference(ptbl->part[i]);
 208
 209                if (part && part->start_sect <= sector &&
 210                    sector < part->start_sect + part->nr_sects)
 211                        return part;
 212        }
 213        return &disk->part0;
 214}
 215EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
 216
 217/*
 218 * Can be deleted altogether. Later.
 219 *
 220 */
 221static struct blk_major_name {
 222        struct blk_major_name *next;
 223        int major;
 224        char name[16];
 225} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 226
 227/* index in the above - for now: assume no multimajor ranges */
 228static inline int major_to_index(int major)
 229{
 230        return major % BLKDEV_MAJOR_HASH_SIZE;
 231}
 232
 233#ifdef CONFIG_PROC_FS
 234void blkdev_show(struct seq_file *seqf, off_t offset)
 235{
 236        struct blk_major_name *dp;
 237
 238        if (offset < BLKDEV_MAJOR_HASH_SIZE) {
 239                mutex_lock(&block_class_lock);
 240                for (dp = major_names[offset]; dp; dp = dp->next)
 241                        seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 242                mutex_unlock(&block_class_lock);
 243        }
 244}
 245#endif /* CONFIG_PROC_FS */
 246
 247int register_blkdev(unsigned int major, const char *name)
 248{
 249        struct blk_major_name **n, *p;
 250        int index, ret = 0;
 251
 252        mutex_lock(&block_class_lock);
 253
 254        /* temporary */
 255        if (major == 0) {
 256                for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
 257                        if (major_names[index] == NULL)
 258                                break;
 259                }
 260
 261                if (index == 0) {
 262                        printk("register_blkdev: failed to get major for %s\n",
 263                               name);
 264                        ret = -EBUSY;
 265                        goto out;
 266                }
 267                major = index;
 268                ret = major;
 269        }
 270
 271        p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
 272        if (p == NULL) {
 273                ret = -ENOMEM;
 274                goto out;
 275        }
 276
 277        p->major = major;
 278        strlcpy(p->name, name, sizeof(p->name));
 279        p->next = NULL;
 280        index = major_to_index(major);
 281
 282        for (n = &major_names[index]; *n; n = &(*n)->next) {
 283                if ((*n)->major == major)
 284                        break;
 285        }
 286        if (!*n)
 287                *n = p;
 288        else
 289                ret = -EBUSY;
 290
 291        if (ret < 0) {
 292                printk("register_blkdev: cannot get major %d for %s\n",
 293                       major, name);
 294                kfree(p);
 295        }
 296out:
 297        mutex_unlock(&block_class_lock);
 298        return ret;
 299}
 300
 301EXPORT_SYMBOL(register_blkdev);
 302
 303void unregister_blkdev(unsigned int major, const char *name)
 304{
 305        struct blk_major_name **n;
 306        struct blk_major_name *p = NULL;
 307        int index = major_to_index(major);
 308
 309        mutex_lock(&block_class_lock);
 310        for (n = &major_names[index]; *n; n = &(*n)->next)
 311                if ((*n)->major == major)
 312                        break;
 313        if (!*n || strcmp((*n)->name, name)) {
 314                WARN_ON(1);
 315        } else {
 316                p = *n;
 317                *n = p->next;
 318        }
 319        mutex_unlock(&block_class_lock);
 320        kfree(p);
 321}
 322
 323EXPORT_SYMBOL(unregister_blkdev);
 324
 325static struct kobj_map *bdev_map;
 326
 327/**
 328 * blk_mangle_minor - scatter minor numbers apart
 329 * @minor: minor number to mangle
 330 *
 331 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 332 * is enabled.  Mangling twice gives the original value.
 333 *
 334 * RETURNS:
 335 * Mangled value.
 336 *
 337 * CONTEXT:
 338 * Don't care.
 339 */
 340static int blk_mangle_minor(int minor)
 341{
 342#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
 343        int i;
 344
 345        for (i = 0; i < MINORBITS / 2; i++) {
 346                int low = minor & (1 << i);
 347                int high = minor & (1 << (MINORBITS - 1 - i));
 348                int distance = MINORBITS - 1 - 2 * i;
 349
 350                minor ^= low | high;    /* clear both bits */
 351                low <<= distance;       /* swap the positions */
 352                high >>= distance;
 353                minor |= low | high;    /* and set */
 354        }
 355#endif
 356        return minor;
 357}
 358
 359/**
 360 * blk_alloc_devt - allocate a dev_t for a partition
 361 * @part: partition to allocate dev_t for
 362 * @devt: out parameter for resulting dev_t
 363 *
 364 * Allocate a dev_t for block device.
 365 *
 366 * RETURNS:
 367 * 0 on success, allocated dev_t is returned in *@devt.  -errno on
 368 * failure.
 369 *
 370 * CONTEXT:
 371 * Might sleep.
 372 */
 373int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
 374{
 375        struct gendisk *disk = part_to_disk(part);
 376        int idx, rc;
 377
 378        /* in consecutive minor range? */
 379        if (part->partno < disk->minors) {
 380                *devt = MKDEV(disk->major, disk->first_minor + part->partno);
 381                return 0;
 382        }
 383
 384        /* allocate ext devt */
 385        do {
 386                if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
 387                        return -ENOMEM;
 388                rc = idr_get_new(&ext_devt_idr, part, &idx);
 389        } while (rc == -EAGAIN);
 390
 391        if (rc)
 392                return rc;
 393
 394        if (idx > MAX_EXT_DEVT) {
 395                idr_remove(&ext_devt_idr, idx);
 396                return -EBUSY;
 397        }
 398
 399        *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
 400        return 0;
 401}
 402
 403/**
 404 * blk_free_devt - free a dev_t
 405 * @devt: dev_t to free
 406 *
 407 * Free @devt which was allocated using blk_alloc_devt().
 408 *
 409 * CONTEXT:
 410 * Might sleep.
 411 */
 412void blk_free_devt(dev_t devt)
 413{
 414        might_sleep();
 415
 416        if (devt == MKDEV(0, 0))
 417                return;
 418
 419        if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
 420                mutex_lock(&ext_devt_mutex);
 421                idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 422                mutex_unlock(&ext_devt_mutex);
 423        }
 424}
 425
 426static char *bdevt_str(dev_t devt, char *buf)
 427{
 428        if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
 429                char tbuf[BDEVT_SIZE];
 430                snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
 431                snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
 432        } else
 433                snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
 434
 435        return buf;
 436}
 437
 438/*
 439 * Register device numbers dev..(dev+range-1)
 440 * range must be nonzero
 441 * The hash chain is sorted on range, so that subranges can override.
 442 */
 443void blk_register_region(dev_t devt, unsigned long range, struct module *module,
 444                         struct kobject *(*probe)(dev_t, int *, void *),
 445                         int (*lock)(dev_t, void *), void *data)
 446{
 447        kobj_map(bdev_map, devt, range, module, probe, lock, data);
 448}
 449
 450EXPORT_SYMBOL(blk_register_region);
 451
 452void blk_unregister_region(dev_t devt, unsigned long range)
 453{
 454        kobj_unmap(bdev_map, devt, range);
 455}
 456
 457EXPORT_SYMBOL(blk_unregister_region);
 458
 459static struct kobject *exact_match(dev_t devt, int *partno, void *data)
 460{
 461        struct gendisk *p = data;
 462
 463        return &disk_to_dev(p)->kobj;
 464}
 465
 466static int exact_lock(dev_t devt, void *data)
 467{
 468        struct gendisk *p = data;
 469
 470        if (!get_disk(p))
 471                return -1;
 472        return 0;
 473}
 474
 475/**
 476 * add_disk - add partitioning information to kernel list
 477 * @disk: per-device partitioning information
 478 *
 479 * This function registers the partitioning information in @disk
 480 * with the kernel.
 481 *
 482 * FIXME: error handling
 483 */
 484void add_disk(struct gendisk *disk)
 485{
 486        struct backing_dev_info *bdi;
 487        dev_t devt;
 488        int retval;
 489
 490        /* minors == 0 indicates to use ext devt from part0 and should
 491         * be accompanied with EXT_DEVT flag.  Make sure all
 492         * parameters make sense.
 493         */
 494        WARN_ON(disk->minors && !(disk->major || disk->first_minor));
 495        WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
 496
 497        disk->flags |= GENHD_FL_UP;
 498
 499        retval = blk_alloc_devt(&disk->part0, &devt);
 500        if (retval) {
 501                WARN_ON(1);
 502                return;
 503        }
 504        disk_to_dev(disk)->devt = devt;
 505
 506        /* ->major and ->first_minor aren't supposed to be
 507         * dereferenced from here on, but set them just in case.
 508         */
 509        disk->major = MAJOR(devt);
 510        disk->first_minor = MINOR(devt);
 511
 512        blk_register_region(disk_devt(disk), disk->minors, NULL,
 513                            exact_match, exact_lock, disk);
 514        register_disk(disk);
 515        blk_register_queue(disk);
 516
 517        bdi = &disk->queue->backing_dev_info;
 518        bdi_register_dev(bdi, disk_devt(disk));
 519        retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 520                                   "bdi");
 521        WARN_ON(retval);
 522}
 523
 524EXPORT_SYMBOL(add_disk);
 525EXPORT_SYMBOL(del_gendisk);     /* in partitions/check.c */
 526
 527void unlink_gendisk(struct gendisk *disk)
 528{
 529        sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 530        bdi_unregister(&disk->queue->backing_dev_info);
 531        blk_unregister_queue(disk);
 532        blk_unregister_region(disk_devt(disk), disk->minors);
 533}
 534
 535/**
 536 * get_gendisk - get partitioning information for a given device
 537 * @devt: device to get partitioning information for
 538 * @partno: returned partition index
 539 *
 540 * This function gets the structure containing partitioning
 541 * information for the given device @devt.
 542 */
 543struct gendisk *get_gendisk(dev_t devt, int *partno)
 544{
 545        struct gendisk *disk = NULL;
 546
 547        if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
 548                struct kobject *kobj;
 549
 550                kobj = kobj_lookup(bdev_map, devt, partno);
 551                if (kobj)
 552                        disk = dev_to_disk(kobj_to_dev(kobj));
 553        } else {
 554                struct hd_struct *part;
 555
 556                mutex_lock(&ext_devt_mutex);
 557                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 558                if (part && get_disk(part_to_disk(part))) {
 559                        *partno = part->partno;
 560                        disk = part_to_disk(part);
 561                }
 562                mutex_unlock(&ext_devt_mutex);
 563        }
 564
 565        return disk;
 566}
 567
 568/**
 569 * bdget_disk - do bdget() by gendisk and partition number
 570 * @disk: gendisk of interest
 571 * @partno: partition number
 572 *
 573 * Find partition @partno from @disk, do bdget() on it.
 574 *
 575 * CONTEXT:
 576 * Don't care.
 577 *
 578 * RETURNS:
 579 * Resulting block_device on success, NULL on failure.
 580 */
 581struct block_device *bdget_disk(struct gendisk *disk, int partno)
 582{
 583        struct hd_struct *part;
 584        struct block_device *bdev = NULL;
 585
 586        part = disk_get_part(disk, partno);
 587        if (part)
 588                bdev = bdget(part_devt(part));
 589        disk_put_part(part);
 590
 591        return bdev;
 592}
 593EXPORT_SYMBOL(bdget_disk);
 594
 595/*
 596 * print a full list of all partitions - intended for places where the root
 597 * filesystem can't be mounted and thus to give the victim some idea of what
 598 * went wrong
 599 */
 600void __init printk_all_partitions(void)
 601{
 602        struct class_dev_iter iter;
 603        struct device *dev;
 604
 605        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 606        while ((dev = class_dev_iter_next(&iter))) {
 607                struct gendisk *disk = dev_to_disk(dev);
 608                struct disk_part_iter piter;
 609                struct hd_struct *part;
 610                char name_buf[BDEVNAME_SIZE];
 611                char devt_buf[BDEVT_SIZE];
 612
 613                /*
 614                 * Don't show empty devices or things that have been
 615                 * surpressed
 616                 */
 617                if (get_capacity(disk) == 0 ||
 618                    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
 619                        continue;
 620
 621                /*
 622                 * Note, unlike /proc/partitions, I am showing the
 623                 * numbers in hex - the same format as the root=
 624                 * option takes.
 625                 */
 626                disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
 627                while ((part = disk_part_iter_next(&piter))) {
 628                        bool is_part0 = part == &disk->part0;
 629
 630                        printk("%s%s %10llu %s", is_part0 ? "" : "  ",
 631                               bdevt_str(part_devt(part), devt_buf),
 632                               (unsigned long long)part->nr_sects >> 1,
 633                               disk_name(disk, part->partno, name_buf));
 634                        if (is_part0) {
 635                                if (disk->driverfs_dev != NULL &&
 636                                    disk->driverfs_dev->driver != NULL)
 637                                        printk(" driver: %s\n",
 638                                              disk->driverfs_dev->driver->name);
 639                                else
 640                                        printk(" (driver?)\n");
 641                        } else
 642                                printk("\n");
 643                }
 644                disk_part_iter_exit(&piter);
 645        }
 646        class_dev_iter_exit(&iter);
 647}
 648
 649#ifdef CONFIG_PROC_FS
 650/* iterator */
 651static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 652{
 653        loff_t skip = *pos;
 654        struct class_dev_iter *iter;
 655        struct device *dev;
 656
 657        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 658        if (!iter)
 659                return ERR_PTR(-ENOMEM);
 660
 661        seqf->private = iter;
 662        class_dev_iter_init(iter, &block_class, NULL, &disk_type);
 663        do {
 664                dev = class_dev_iter_next(iter);
 665                if (!dev)
 666                        return NULL;
 667        } while (skip--);
 668
 669        return dev_to_disk(dev);
 670}
 671
 672static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 673{
 674        struct device *dev;
 675
 676        (*pos)++;
 677        dev = class_dev_iter_next(seqf->private);
 678        if (dev)
 679                return dev_to_disk(dev);
 680
 681        return NULL;
 682}
 683
 684static void disk_seqf_stop(struct seq_file *seqf, void *v)
 685{
 686        struct class_dev_iter *iter = seqf->private;
 687
 688        /* stop is called even after start failed :-( */
 689        if (iter) {
 690                class_dev_iter_exit(iter);
 691                kfree(iter);
 692        }
 693}
 694
 695static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 696{
 697        static void *p;
 698
 699        p = disk_seqf_start(seqf, pos);
 700        if (!IS_ERR(p) && p && !*pos)
 701                seq_puts(seqf, "major minor  #blocks  name\n\n");
 702        return p;
 703}
 704
 705static int show_partition(struct seq_file *seqf, void *v)
 706{
 707        struct gendisk *sgp = v;
 708        struct disk_part_iter piter;
 709        struct hd_struct *part;
 710        char buf[BDEVNAME_SIZE];
 711
 712        /* Don't show non-partitionable removeable devices or empty devices */
 713        if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
 714                                   (sgp->flags & GENHD_FL_REMOVABLE)))
 715                return 0;
 716        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
 717                return 0;
 718
 719        /* show the full disk and all non-0 size partitions of it */
 720        disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
 721        while ((part = disk_part_iter_next(&piter)))
 722                seq_printf(seqf, "%4d  %7d %10llu %s\n",
 723                           MAJOR(part_devt(part)), MINOR(part_devt(part)),
 724                           (unsigned long long)part->nr_sects >> 1,
 725                           disk_name(sgp, part->partno, buf));
 726        disk_part_iter_exit(&piter);
 727
 728        return 0;
 729}
 730
 731static const struct seq_operations partitions_op = {
 732        .start  = show_partition_start,
 733        .next   = disk_seqf_next,
 734        .stop   = disk_seqf_stop,
 735        .show   = show_partition
 736};
 737
 738static int partitions_open(struct inode *inode, struct file *file)
 739{
 740        return seq_open(file, &partitions_op);
 741}
 742
 743static const struct file_operations proc_partitions_operations = {
 744        .open           = partitions_open,
 745        .read           = seq_read,
 746        .llseek         = seq_lseek,
 747        .release        = seq_release,
 748};
 749#endif
 750
 751
 752static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 753{
 754        if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 755                /* Make old-style 2.4 aliases work */
 756                request_module("block-major-%d", MAJOR(devt));
 757        return NULL;
 758}
 759
 760static int __init genhd_device_init(void)
 761{
 762        int error;
 763
 764        block_class.dev_kobj = sysfs_dev_block_kobj;
 765        error = class_register(&block_class);
 766        if (unlikely(error))
 767                return error;
 768        bdev_map = kobj_map_init(base_probe, &block_class_lock);
 769        blk_dev_init();
 770
 771        register_blkdev(BLOCK_EXT_MAJOR, "blkext");
 772
 773#ifndef CONFIG_SYSFS_DEPRECATED
 774        /* create top-level block dir */
 775        block_depr = kobject_create_and_add("block", NULL);
 776#endif
 777        return 0;
 778}
 779
 780subsys_initcall(genhd_device_init);
 781
 782static ssize_t disk_range_show(struct device *dev,
 783                               struct device_attribute *attr, char *buf)
 784{
 785        struct gendisk *disk = dev_to_disk(dev);
 786
 787        return sprintf(buf, "%d\n", disk->minors);
 788}
 789
 790static ssize_t disk_ext_range_show(struct device *dev,
 791                                   struct device_attribute *attr, char *buf)
 792{
 793        struct gendisk *disk = dev_to_disk(dev);
 794
 795        return sprintf(buf, "%d\n", disk_max_parts(disk));
 796}
 797
 798static ssize_t disk_removable_show(struct device *dev,
 799                                   struct device_attribute *attr, char *buf)
 800{
 801        struct gendisk *disk = dev_to_disk(dev);
 802
 803        return sprintf(buf, "%d\n",
 804                       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
 805}
 806
 807static ssize_t disk_ro_show(struct device *dev,
 808                                   struct device_attribute *attr, char *buf)
 809{
 810        struct gendisk *disk = dev_to_disk(dev);
 811
 812        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 813}
 814
 815static ssize_t disk_capability_show(struct device *dev,
 816                                    struct device_attribute *attr, char *buf)
 817{
 818        struct gendisk *disk = dev_to_disk(dev);
 819
 820        return sprintf(buf, "%x\n", disk->flags);
 821}
 822
 823static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 824static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 825static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 826static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 827static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 828static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 829static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 830#ifdef CONFIG_FAIL_MAKE_REQUEST
 831static struct device_attribute dev_attr_fail =
 832        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 833#endif
 834#ifdef CONFIG_FAIL_IO_TIMEOUT
 835static struct device_attribute dev_attr_fail_timeout =
 836        __ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
 837                part_timeout_store);
 838#endif
 839
 840static struct attribute *disk_attrs[] = {
 841        &dev_attr_range.attr,
 842        &dev_attr_ext_range.attr,
 843        &dev_attr_removable.attr,
 844        &dev_attr_ro.attr,
 845        &dev_attr_size.attr,
 846        &dev_attr_capability.attr,
 847        &dev_attr_stat.attr,
 848#ifdef CONFIG_FAIL_MAKE_REQUEST
 849        &dev_attr_fail.attr,
 850#endif
 851#ifdef CONFIG_FAIL_IO_TIMEOUT
 852        &dev_attr_fail_timeout.attr,
 853#endif
 854        NULL
 855};
 856
 857static struct attribute_group disk_attr_group = {
 858        .attrs = disk_attrs,
 859};
 860
 861static struct attribute_group *disk_attr_groups[] = {
 862        &disk_attr_group,
 863        NULL
 864};
 865
 866static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
 867{
 868        struct disk_part_tbl *ptbl =
 869                container_of(head, struct disk_part_tbl, rcu_head);
 870
 871        kfree(ptbl);
 872}
 873
 874/**
 875 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
 876 * @disk: disk to replace part_tbl for
 877 * @new_ptbl: new part_tbl to install
 878 *
 879 * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
 880 * original ptbl is freed using RCU callback.
 881 *
 882 * LOCKING:
 883 * Matching bd_mutx locked.
 884 */
 885static void disk_replace_part_tbl(struct gendisk *disk,
 886                                  struct disk_part_tbl *new_ptbl)
 887{
 888        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 889
 890        rcu_assign_pointer(disk->part_tbl, new_ptbl);
 891        if (old_ptbl)
 892                call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
 893}
 894
 895/**
 896 * disk_expand_part_tbl - expand disk->part_tbl
 897 * @disk: disk to expand part_tbl for
 898 * @partno: expand such that this partno can fit in
 899 *
 900 * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
 901 * uses RCU to allow unlocked dereferencing for stats and other stuff.
 902 *
 903 * LOCKING:
 904 * Matching bd_mutex locked, might sleep.
 905 *
 906 * RETURNS:
 907 * 0 on success, -errno on failure.
 908 */
 909int disk_expand_part_tbl(struct gendisk *disk, int partno)
 910{
 911        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 912        struct disk_part_tbl *new_ptbl;
 913        int len = old_ptbl ? old_ptbl->len : 0;
 914        int target = partno + 1;
 915        size_t size;
 916        int i;
 917
 918        /* disk_max_parts() is zero during initialization, ignore if so */
 919        if (disk_max_parts(disk) && target > disk_max_parts(disk))
 920                return -EINVAL;
 921
 922        if (target <= len)
 923                return 0;
 924
 925        size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
 926        new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
 927        if (!new_ptbl)
 928                return -ENOMEM;
 929
 930        INIT_RCU_HEAD(&new_ptbl->rcu_head);
 931        new_ptbl->len = target;
 932
 933        for (i = 0; i < len; i++)
 934                rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
 935
 936        disk_replace_part_tbl(disk, new_ptbl);
 937        return 0;
 938}
 939
 940static void disk_release(struct device *dev)
 941{
 942        struct gendisk *disk = dev_to_disk(dev);
 943
 944        kfree(disk->random);
 945        disk_replace_part_tbl(disk, NULL);
 946        free_part_stats(&disk->part0);
 947        kfree(disk);
 948}
 949struct class block_class = {
 950        .name           = "block",
 951};
 952
 953static struct device_type disk_type = {
 954        .name           = "disk",
 955        .groups         = disk_attr_groups,
 956        .release        = disk_release,
 957};
 958
 959#ifdef CONFIG_PROC_FS
 960/*
 961 * aggregate disk stat collector.  Uses the same stats that the sysfs
 962 * entries do, above, but makes them available through one seq_file.
 963 *
 964 * The output looks suspiciously like /proc/partitions with a bunch of
 965 * extra fields.
 966 */
 967static int diskstats_show(struct seq_file *seqf, void *v)
 968{
 969        struct gendisk *gp = v;
 970        struct disk_part_iter piter;
 971        struct hd_struct *hd;
 972        char buf[BDEVNAME_SIZE];
 973        int cpu;
 974
 975        /*
 976        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
 977                seq_puts(seqf,  "major minor name"
 978                                "     rio rmerge rsect ruse wio wmerge "
 979                                "wsect wuse running use aveq"
 980                                "\n\n");
 981        */
 982 
 983        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0);
 984        while ((hd = disk_part_iter_next(&piter))) {
 985                cpu = part_stat_lock();
 986                part_round_stats(cpu, hd);
 987                part_stat_unlock();
 988                seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
 989                           "%u %lu %lu %llu %u %u %u %u\n",
 990                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 991                           disk_name(gp, hd->partno, buf),
 992                           part_stat_read(hd, ios[0]),
 993                           part_stat_read(hd, merges[0]),
 994                           (unsigned long long)part_stat_read(hd, sectors[0]),
 995                           jiffies_to_msecs(part_stat_read(hd, ticks[0])),
 996                           part_stat_read(hd, ios[1]),
 997                           part_stat_read(hd, merges[1]),
 998                           (unsigned long long)part_stat_read(hd, sectors[1]),
 999                           jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1000                           hd->in_flight,
1001                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1002                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1003                        );
1004        }
1005        disk_part_iter_exit(&piter);
1006 
1007        return 0;
1008}
1009
1010static const struct seq_operations diskstats_op = {
1011        .start  = disk_seqf_start,
1012        .next   = disk_seqf_next,
1013        .stop   = disk_seqf_stop,
1014        .show   = diskstats_show
1015};
1016
1017static int diskstats_open(struct inode *inode, struct file *file)
1018{
1019        return seq_open(file, &diskstats_op);
1020}
1021
1022static const struct file_operations proc_diskstats_operations = {
1023        .open           = diskstats_open,
1024        .read           = seq_read,
1025        .llseek         = seq_lseek,
1026        .release        = seq_release,
1027};
1028
1029static int __init proc_genhd_init(void)
1030{
1031        proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
1032        proc_create("partitions", 0, NULL, &proc_partitions_operations);
1033        return 0;
1034}
1035module_init(proc_genhd_init);
1036#endif /* CONFIG_PROC_FS */
1037
1038static void media_change_notify_thread(struct work_struct *work)
1039{
1040        struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1041        char event[] = "MEDIA_CHANGE=1";
1042        char *envp[] = { event, NULL };
1043
1044        /*
1045         * set enviroment vars to indicate which event this is for
1046         * so that user space will know to go check the media status.
1047         */
1048        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1049        put_device(gd->driverfs_dev);
1050}
1051
1052#if 0
1053void genhd_media_change_notify(struct gendisk *disk)
1054{
1055        get_device(disk->driverfs_dev);
1056        schedule_work(&disk->async_notify);
1057}
1058EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1059#endif  /*  0  */
1060
1061dev_t blk_lookup_devt(const char *name, int partno)
1062{
1063        dev_t devt = MKDEV(0, 0);
1064        struct class_dev_iter iter;
1065        struct device *dev;
1066
1067        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1068        while ((dev = class_dev_iter_next(&iter))) {
1069                struct gendisk *disk = dev_to_disk(dev);
1070                struct hd_struct *part;
1071
1072                if (strcmp(dev->bus_id, name))
1073                        continue;
1074
1075                part = disk_get_part(disk, partno);
1076                if (part) {
1077                        devt = part_devt(part);
1078                        disk_put_part(part);
1079                        break;
1080                }
1081                disk_put_part(part);
1082        }
1083        class_dev_iter_exit(&iter);
1084        return devt;
1085}
1086EXPORT_SYMBOL(blk_lookup_devt);
1087
1088struct gendisk *alloc_disk(int minors)
1089{
1090        return alloc_disk_node(minors, -1);
1091}
1092EXPORT_SYMBOL(alloc_disk);
1093
1094struct gendisk *alloc_disk_node(int minors, int node_id)
1095{
1096        struct gendisk *disk;
1097
1098        disk = kmalloc_node(sizeof(struct gendisk),
1099                                GFP_KERNEL | __GFP_ZERO, node_id);
1100        if (disk) {
1101                if (!init_part_stats(&disk->part0)) {
1102                        kfree(disk);
1103                        return NULL;
1104                }
1105                disk->node_id = node_id;
1106                if (disk_expand_part_tbl(disk, 0)) {
1107                        free_part_stats(&disk->part0);
1108                        kfree(disk);
1109                        return NULL;
1110                }
1111                disk->part_tbl->part[0] = &disk->part0;
1112
1113                disk->minors = minors;
1114                rand_initialize_disk(disk);
1115                disk_to_dev(disk)->class = &block_class;
1116                disk_to_dev(disk)->type = &disk_type;
1117                device_initialize(disk_to_dev(disk));
1118                INIT_WORK(&disk->async_notify,
1119                        media_change_notify_thread);
1120        }
1121        return disk;
1122}
1123EXPORT_SYMBOL(alloc_disk_node);
1124
1125struct kobject *get_disk(struct gendisk *disk)
1126{
1127        struct module *owner;
1128        struct kobject *kobj;
1129
1130        if (!disk->fops)
1131                return NULL;
1132        owner = disk->fops->owner;
1133        if (owner && !try_module_get(owner))
1134                return NULL;
1135        kobj = kobject_get(&disk_to_dev(disk)->kobj);
1136        if (kobj == NULL) {
1137                module_put(owner);
1138                return NULL;
1139        }
1140        return kobj;
1141
1142}
1143
1144EXPORT_SYMBOL(get_disk);
1145
1146void put_disk(struct gendisk *disk)
1147{
1148        if (disk)
1149                kobject_put(&disk_to_dev(disk)->kobj);
1150}
1151
1152EXPORT_SYMBOL(put_disk);
1153
1154void set_device_ro(struct block_device *bdev, int flag)
1155{
1156        bdev->bd_part->policy = flag;
1157}
1158
1159EXPORT_SYMBOL(set_device_ro);
1160
1161void set_disk_ro(struct gendisk *disk, int flag)
1162{
1163        struct disk_part_iter piter;
1164        struct hd_struct *part;
1165
1166        disk_part_iter_init(&piter, disk,
1167                            DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
1168        while ((part = disk_part_iter_next(&piter)))
1169                part->policy = flag;
1170        disk_part_iter_exit(&piter);
1171}
1172
1173EXPORT_SYMBOL(set_disk_ro);
1174
1175int bdev_read_only(struct block_device *bdev)
1176{
1177        if (!bdev)
1178                return 0;
1179        return bdev->bd_part->policy;
1180}
1181
1182EXPORT_SYMBOL(bdev_read_only);
1183
1184int invalidate_partition(struct gendisk *disk, int partno)
1185{
1186        int res = 0;
1187        struct block_device *bdev = bdget_disk(disk, partno);
1188        if (bdev) {
1189                fsync_bdev(bdev);
1190                res = __invalidate_device(bdev);
1191                bdput(bdev);
1192        }
1193        return res;
1194}
1195
1196EXPORT_SYMBOL(invalidate_partition);
1197