linux/block/genhd.c
<<
>>
Prefs
   1/*
   2 *  gendisk handling
   3 */
   4
   5#include <linux/module.h>
   6#include <linux/fs.h>
   7#include <linux/genhd.h>
   8#include <linux/kdev_t.h>
   9#include <linux/kernel.h>
  10#include <linux/blkdev.h>
  11#include <linux/init.h>
  12#include <linux/spinlock.h>
  13#include <linux/proc_fs.h>
  14#include <linux/seq_file.h>
  15#include <linux/slab.h>
  16#include <linux/kmod.h>
  17#include <linux/kobj_map.h>
  18#include <linux/buffer_head.h>
  19#include <linux/mutex.h>
  20#include <linux/idr.h>
  21
  22#include "blk.h"
  23
  24static DEFINE_MUTEX(block_class_lock);
  25#ifndef CONFIG_SYSFS_DEPRECATED
  26struct kobject *block_depr;
  27#endif
  28
  29/* for extended dynamic devt allocation, currently only one major is used */
  30#define MAX_EXT_DEVT            (1 << MINORBITS)
  31
  32/* For extended devt allocation.  ext_devt_mutex prevents look up
  33 * results from going away underneath its user.
  34 */
  35static DEFINE_MUTEX(ext_devt_mutex);
  36static DEFINE_IDR(ext_devt_idr);
  37
  38static struct device_type disk_type;
  39
  40/**
  41 * disk_get_part - get partition
  42 * @disk: disk to look partition from
  43 * @partno: partition number
  44 *
  45 * Look for partition @partno from @disk.  If found, increment
  46 * reference count and return it.
  47 *
  48 * CONTEXT:
  49 * Don't care.
  50 *
  51 * RETURNS:
  52 * Pointer to the found partition on success, NULL if not found.
  53 */
  54struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
  55{
  56        struct hd_struct *part = NULL;
  57        struct disk_part_tbl *ptbl;
  58
  59        if (unlikely(partno < 0))
  60                return NULL;
  61
  62        rcu_read_lock();
  63
  64        ptbl = rcu_dereference(disk->part_tbl);
  65        if (likely(partno < ptbl->len)) {
  66                part = rcu_dereference(ptbl->part[partno]);
  67                if (part)
  68                        get_device(part_to_dev(part));
  69        }
  70
  71        rcu_read_unlock();
  72
  73        return part;
  74}
  75EXPORT_SYMBOL_GPL(disk_get_part);
  76
  77/**
  78 * disk_part_iter_init - initialize partition iterator
  79 * @piter: iterator to initialize
  80 * @disk: disk to iterate over
  81 * @flags: DISK_PITER_* flags
  82 *
  83 * Initialize @piter so that it iterates over partitions of @disk.
  84 *
  85 * CONTEXT:
  86 * Don't care.
  87 */
  88void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
  89                          unsigned int flags)
  90{
  91        struct disk_part_tbl *ptbl;
  92
  93        rcu_read_lock();
  94        ptbl = rcu_dereference(disk->part_tbl);
  95
  96        piter->disk = disk;
  97        piter->part = NULL;
  98
  99        if (flags & DISK_PITER_REVERSE)
 100                piter->idx = ptbl->len - 1;
 101        else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
 102                piter->idx = 0;
 103        else
 104                piter->idx = 1;
 105
 106        piter->flags = flags;
 107
 108        rcu_read_unlock();
 109}
 110EXPORT_SYMBOL_GPL(disk_part_iter_init);
 111
 112/**
 113 * disk_part_iter_next - proceed iterator to the next partition and return it
 114 * @piter: iterator of interest
 115 *
 116 * Proceed @piter to the next partition and return it.
 117 *
 118 * CONTEXT:
 119 * Don't care.
 120 */
 121struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
 122{
 123        struct disk_part_tbl *ptbl;
 124        int inc, end;
 125
 126        /* put the last partition */
 127        disk_put_part(piter->part);
 128        piter->part = NULL;
 129
 130        /* get part_tbl */
 131        rcu_read_lock();
 132        ptbl = rcu_dereference(piter->disk->part_tbl);
 133
 134        /* determine iteration parameters */
 135        if (piter->flags & DISK_PITER_REVERSE) {
 136                inc = -1;
 137                if (piter->flags & (DISK_PITER_INCL_PART0 |
 138                                    DISK_PITER_INCL_EMPTY_PART0))
 139                        end = -1;
 140                else
 141                        end = 0;
 142        } else {
 143                inc = 1;
 144                end = ptbl->len;
 145        }
 146
 147        /* iterate to the next partition */
 148        for (; piter->idx != end; piter->idx += inc) {
 149                struct hd_struct *part;
 150
 151                part = rcu_dereference(ptbl->part[piter->idx]);
 152                if (!part)
 153                        continue;
 154                if (!part->nr_sects &&
 155                    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
 156                    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
 157                      piter->idx == 0))
 158                        continue;
 159
 160                get_device(part_to_dev(part));
 161                piter->part = part;
 162                piter->idx += inc;
 163                break;
 164        }
 165
 166        rcu_read_unlock();
 167
 168        return piter->part;
 169}
 170EXPORT_SYMBOL_GPL(disk_part_iter_next);
 171
 172/**
 173 * disk_part_iter_exit - finish up partition iteration
 174 * @piter: iter of interest
 175 *
 176 * Called when iteration is over.  Cleans up @piter.
 177 *
 178 * CONTEXT:
 179 * Don't care.
 180 */
 181void disk_part_iter_exit(struct disk_part_iter *piter)
 182{
 183        disk_put_part(piter->part);
 184        piter->part = NULL;
 185}
 186EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 187
 188/**
 189 * disk_map_sector_rcu - map sector to partition
 190 * @disk: gendisk of interest
 191 * @sector: sector to map
 192 *
 193 * Find out which partition @sector maps to on @disk.  This is
 194 * primarily used for stats accounting.
 195 *
 196 * CONTEXT:
 197 * RCU read locked.  The returned partition pointer is valid only
 198 * while preemption is disabled.
 199 *
 200 * RETURNS:
 201 * Found partition on success, part0 is returned if no partition matches
 202 */
 203struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 204{
 205        struct disk_part_tbl *ptbl;
 206        int i;
 207
 208        ptbl = rcu_dereference(disk->part_tbl);
 209
 210        for (i = 1; i < ptbl->len; i++) {
 211                struct hd_struct *part = rcu_dereference(ptbl->part[i]);
 212
 213                if (part && part->start_sect <= sector &&
 214                    sector < part->start_sect + part->nr_sects)
 215                        return part;
 216        }
 217        return &disk->part0;
 218}
 219EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
 220
 221/*
 222 * Can be deleted altogether. Later.
 223 *
 224 */
 225static struct blk_major_name {
 226        struct blk_major_name *next;
 227        int major;
 228        char name[16];
 229} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 230
 231/* index in the above - for now: assume no multimajor ranges */
 232static inline int major_to_index(int major)
 233{
 234        return major % BLKDEV_MAJOR_HASH_SIZE;
 235}
 236
 237#ifdef CONFIG_PROC_FS
 238void blkdev_show(struct seq_file *seqf, off_t offset)
 239{
 240        struct blk_major_name *dp;
 241
 242        if (offset < BLKDEV_MAJOR_HASH_SIZE) {
 243                mutex_lock(&block_class_lock);
 244                for (dp = major_names[offset]; dp; dp = dp->next)
 245                        seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 246                mutex_unlock(&block_class_lock);
 247        }
 248}
 249#endif /* CONFIG_PROC_FS */
 250
 251int register_blkdev(unsigned int major, const char *name)
 252{
 253        struct blk_major_name **n, *p;
 254        int index, ret = 0;
 255
 256        mutex_lock(&block_class_lock);
 257
 258        /* temporary */
 259        if (major == 0) {
 260                for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
 261                        if (major_names[index] == NULL)
 262                                break;
 263                }
 264
 265                if (index == 0) {
 266                        printk("register_blkdev: failed to get major for %s\n",
 267                               name);
 268                        ret = -EBUSY;
 269                        goto out;
 270                }
 271                major = index;
 272                ret = major;
 273        }
 274
 275        p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
 276        if (p == NULL) {
 277                ret = -ENOMEM;
 278                goto out;
 279        }
 280
 281        p->major = major;
 282        strlcpy(p->name, name, sizeof(p->name));
 283        p->next = NULL;
 284        index = major_to_index(major);
 285
 286        for (n = &major_names[index]; *n; n = &(*n)->next) {
 287                if ((*n)->major == major)
 288                        break;
 289        }
 290        if (!*n)
 291                *n = p;
 292        else
 293                ret = -EBUSY;
 294
 295        if (ret < 0) {
 296                printk("register_blkdev: cannot get major %d for %s\n",
 297                       major, name);
 298                kfree(p);
 299        }
 300out:
 301        mutex_unlock(&block_class_lock);
 302        return ret;
 303}
 304
 305EXPORT_SYMBOL(register_blkdev);
 306
 307void unregister_blkdev(unsigned int major, const char *name)
 308{
 309        struct blk_major_name **n;
 310        struct blk_major_name *p = NULL;
 311        int index = major_to_index(major);
 312
 313        mutex_lock(&block_class_lock);
 314        for (n = &major_names[index]; *n; n = &(*n)->next)
 315                if ((*n)->major == major)
 316                        break;
 317        if (!*n || strcmp((*n)->name, name)) {
 318                WARN_ON(1);
 319        } else {
 320                p = *n;
 321                *n = p->next;
 322        }
 323        mutex_unlock(&block_class_lock);
 324        kfree(p);
 325}
 326
 327EXPORT_SYMBOL(unregister_blkdev);
 328
 329static struct kobj_map *bdev_map;
 330
 331/**
 332 * blk_mangle_minor - scatter minor numbers apart
 333 * @minor: minor number to mangle
 334 *
 335 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 336 * is enabled.  Mangling twice gives the original value.
 337 *
 338 * RETURNS:
 339 * Mangled value.
 340 *
 341 * CONTEXT:
 342 * Don't care.
 343 */
 344static int blk_mangle_minor(int minor)
 345{
 346#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
 347        int i;
 348
 349        for (i = 0; i < MINORBITS / 2; i++) {
 350                int low = minor & (1 << i);
 351                int high = minor & (1 << (MINORBITS - 1 - i));
 352                int distance = MINORBITS - 1 - 2 * i;
 353
 354                minor ^= low | high;    /* clear both bits */
 355                low <<= distance;       /* swap the positions */
 356                high >>= distance;
 357                minor |= low | high;    /* and set */
 358        }
 359#endif
 360        return minor;
 361}
 362
 363/**
 364 * blk_alloc_devt - allocate a dev_t for a partition
 365 * @part: partition to allocate dev_t for
 366 * @devt: out parameter for resulting dev_t
 367 *
 368 * Allocate a dev_t for block device.
 369 *
 370 * RETURNS:
 371 * 0 on success, allocated dev_t is returned in *@devt.  -errno on
 372 * failure.
 373 *
 374 * CONTEXT:
 375 * Might sleep.
 376 */
 377int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
 378{
 379        struct gendisk *disk = part_to_disk(part);
 380        int idx, rc;
 381
 382        /* in consecutive minor range? */
 383        if (part->partno < disk->minors) {
 384                *devt = MKDEV(disk->major, disk->first_minor + part->partno);
 385                return 0;
 386        }
 387
 388        /* allocate ext devt */
 389        do {
 390                if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
 391                        return -ENOMEM;
 392                rc = idr_get_new(&ext_devt_idr, part, &idx);
 393        } while (rc == -EAGAIN);
 394
 395        if (rc)
 396                return rc;
 397
 398        if (idx > MAX_EXT_DEVT) {
 399                idr_remove(&ext_devt_idr, idx);
 400                return -EBUSY;
 401        }
 402
 403        *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
 404        return 0;
 405}
 406
 407/**
 408 * blk_free_devt - free a dev_t
 409 * @devt: dev_t to free
 410 *
 411 * Free @devt which was allocated using blk_alloc_devt().
 412 *
 413 * CONTEXT:
 414 * Might sleep.
 415 */
 416void blk_free_devt(dev_t devt)
 417{
 418        might_sleep();
 419
 420        if (devt == MKDEV(0, 0))
 421                return;
 422
 423        if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
 424                mutex_lock(&ext_devt_mutex);
 425                idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 426                mutex_unlock(&ext_devt_mutex);
 427        }
 428}
 429
 430static char *bdevt_str(dev_t devt, char *buf)
 431{
 432        if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
 433                char tbuf[BDEVT_SIZE];
 434                snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
 435                snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
 436        } else
 437                snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
 438
 439        return buf;
 440}
 441
 442/*
 443 * Register device numbers dev..(dev+range-1)
 444 * range must be nonzero
 445 * The hash chain is sorted on range, so that subranges can override.
 446 */
 447void blk_register_region(dev_t devt, unsigned long range, struct module *module,
 448                         struct kobject *(*probe)(dev_t, int *, void *),
 449                         int (*lock)(dev_t, void *), void *data)
 450{
 451        kobj_map(bdev_map, devt, range, module, probe, lock, data);
 452}
 453
 454EXPORT_SYMBOL(blk_register_region);
 455
 456void blk_unregister_region(dev_t devt, unsigned long range)
 457{
 458        kobj_unmap(bdev_map, devt, range);
 459}
 460
 461EXPORT_SYMBOL(blk_unregister_region);
 462
 463static struct kobject *exact_match(dev_t devt, int *partno, void *data)
 464{
 465        struct gendisk *p = data;
 466
 467        return &disk_to_dev(p)->kobj;
 468}
 469
 470static int exact_lock(dev_t devt, void *data)
 471{
 472        struct gendisk *p = data;
 473
 474        if (!get_disk(p))
 475                return -1;
 476        return 0;
 477}
 478
 479/**
 480 * add_disk - add partitioning information to kernel list
 481 * @disk: per-device partitioning information
 482 *
 483 * This function registers the partitioning information in @disk
 484 * with the kernel.
 485 *
 486 * FIXME: error handling
 487 */
 488void add_disk(struct gendisk *disk)
 489{
 490        struct backing_dev_info *bdi;
 491        dev_t devt;
 492        int retval;
 493
 494        /* minors == 0 indicates to use ext devt from part0 and should
 495         * be accompanied with EXT_DEVT flag.  Make sure all
 496         * parameters make sense.
 497         */
 498        WARN_ON(disk->minors && !(disk->major || disk->first_minor));
 499        WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
 500
 501        disk->flags |= GENHD_FL_UP;
 502
 503        retval = blk_alloc_devt(&disk->part0, &devt);
 504        if (retval) {
 505                WARN_ON(1);
 506                return;
 507        }
 508        disk_to_dev(disk)->devt = devt;
 509
 510        /* ->major and ->first_minor aren't supposed to be
 511         * dereferenced from here on, but set them just in case.
 512         */
 513        disk->major = MAJOR(devt);
 514        disk->first_minor = MINOR(devt);
 515
 516        blk_register_region(disk_devt(disk), disk->minors, NULL,
 517                            exact_match, exact_lock, disk);
 518        register_disk(disk);
 519        blk_register_queue(disk);
 520
 521        bdi = &disk->queue->backing_dev_info;
 522        bdi_register_dev(bdi, disk_devt(disk));
 523        retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 524                                   "bdi");
 525        WARN_ON(retval);
 526}
 527
 528EXPORT_SYMBOL(add_disk);
 529EXPORT_SYMBOL(del_gendisk);     /* in partitions/check.c */
 530
 531void unlink_gendisk(struct gendisk *disk)
 532{
 533        sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 534        bdi_unregister(&disk->queue->backing_dev_info);
 535        blk_unregister_queue(disk);
 536        blk_unregister_region(disk_devt(disk), disk->minors);
 537}
 538
 539/**
 540 * get_gendisk - get partitioning information for a given device
 541 * @devt: device to get partitioning information for
 542 * @partno: returned partition index
 543 *
 544 * This function gets the structure containing partitioning
 545 * information for the given device @devt.
 546 */
 547struct gendisk *get_gendisk(dev_t devt, int *partno)
 548{
 549        struct gendisk *disk = NULL;
 550
 551        if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
 552                struct kobject *kobj;
 553
 554                kobj = kobj_lookup(bdev_map, devt, partno);
 555                if (kobj)
 556                        disk = dev_to_disk(kobj_to_dev(kobj));
 557        } else {
 558                struct hd_struct *part;
 559
 560                mutex_lock(&ext_devt_mutex);
 561                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 562                if (part && get_disk(part_to_disk(part))) {
 563                        *partno = part->partno;
 564                        disk = part_to_disk(part);
 565                }
 566                mutex_unlock(&ext_devt_mutex);
 567        }
 568
 569        return disk;
 570}
 571
 572/**
 573 * bdget_disk - do bdget() by gendisk and partition number
 574 * @disk: gendisk of interest
 575 * @partno: partition number
 576 *
 577 * Find partition @partno from @disk, do bdget() on it.
 578 *
 579 * CONTEXT:
 580 * Don't care.
 581 *
 582 * RETURNS:
 583 * Resulting block_device on success, NULL on failure.
 584 */
 585struct block_device *bdget_disk(struct gendisk *disk, int partno)
 586{
 587        struct hd_struct *part;
 588        struct block_device *bdev = NULL;
 589
 590        part = disk_get_part(disk, partno);
 591        if (part)
 592                bdev = bdget(part_devt(part));
 593        disk_put_part(part);
 594
 595        return bdev;
 596}
 597EXPORT_SYMBOL(bdget_disk);
 598
 599/*
 600 * print a full list of all partitions - intended for places where the root
 601 * filesystem can't be mounted and thus to give the victim some idea of what
 602 * went wrong
 603 */
 604void __init printk_all_partitions(void)
 605{
 606        struct class_dev_iter iter;
 607        struct device *dev;
 608
 609        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 610        while ((dev = class_dev_iter_next(&iter))) {
 611                struct gendisk *disk = dev_to_disk(dev);
 612                struct disk_part_iter piter;
 613                struct hd_struct *part;
 614                char name_buf[BDEVNAME_SIZE];
 615                char devt_buf[BDEVT_SIZE];
 616
 617                /*
 618                 * Don't show empty devices or things that have been
 619                 * surpressed
 620                 */
 621                if (get_capacity(disk) == 0 ||
 622                    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
 623                        continue;
 624
 625                /*
 626                 * Note, unlike /proc/partitions, I am showing the
 627                 * numbers in hex - the same format as the root=
 628                 * option takes.
 629                 */
 630                disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
 631                while ((part = disk_part_iter_next(&piter))) {
 632                        bool is_part0 = part == &disk->part0;
 633
 634                        printk("%s%s %10llu %s", is_part0 ? "" : "  ",
 635                               bdevt_str(part_devt(part), devt_buf),
 636                               (unsigned long long)part->nr_sects >> 1,
 637                               disk_name(disk, part->partno, name_buf));
 638                        if (is_part0) {
 639                                if (disk->driverfs_dev != NULL &&
 640                                    disk->driverfs_dev->driver != NULL)
 641                                        printk(" driver: %s\n",
 642                                              disk->driverfs_dev->driver->name);
 643                                else
 644                                        printk(" (driver?)\n");
 645                        } else
 646                                printk("\n");
 647                }
 648                disk_part_iter_exit(&piter);
 649        }
 650        class_dev_iter_exit(&iter);
 651}
 652
 653#ifdef CONFIG_PROC_FS
 654/* iterator */
 655static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 656{
 657        loff_t skip = *pos;
 658        struct class_dev_iter *iter;
 659        struct device *dev;
 660
 661        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 662        if (!iter)
 663                return ERR_PTR(-ENOMEM);
 664
 665        seqf->private = iter;
 666        class_dev_iter_init(iter, &block_class, NULL, &disk_type);
 667        do {
 668                dev = class_dev_iter_next(iter);
 669                if (!dev)
 670                        return NULL;
 671        } while (skip--);
 672
 673        return dev_to_disk(dev);
 674}
 675
 676static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 677{
 678        struct device *dev;
 679
 680        (*pos)++;
 681        dev = class_dev_iter_next(seqf->private);
 682        if (dev)
 683                return dev_to_disk(dev);
 684
 685        return NULL;
 686}
 687
 688static void disk_seqf_stop(struct seq_file *seqf, void *v)
 689{
 690        struct class_dev_iter *iter = seqf->private;
 691
 692        /* stop is called even after start failed :-( */
 693        if (iter) {
 694                class_dev_iter_exit(iter);
 695                kfree(iter);
 696        }
 697}
 698
 699static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 700{
 701        static void *p;
 702
 703        p = disk_seqf_start(seqf, pos);
 704        if (!IS_ERR(p) && p && !*pos)
 705                seq_puts(seqf, "major minor  #blocks  name\n\n");
 706        return p;
 707}
 708
 709static int show_partition(struct seq_file *seqf, void *v)
 710{
 711        struct gendisk *sgp = v;
 712        struct disk_part_iter piter;
 713        struct hd_struct *part;
 714        char buf[BDEVNAME_SIZE];
 715
 716        /* Don't show non-partitionable removeable devices or empty devices */
 717        if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
 718                                   (sgp->flags & GENHD_FL_REMOVABLE)))
 719                return 0;
 720        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
 721                return 0;
 722
 723        /* show the full disk and all non-0 size partitions of it */
 724        disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
 725        while ((part = disk_part_iter_next(&piter)))
 726                seq_printf(seqf, "%4d  %7d %10llu %s\n",
 727                           MAJOR(part_devt(part)), MINOR(part_devt(part)),
 728                           (unsigned long long)part->nr_sects >> 1,
 729                           disk_name(sgp, part->partno, buf));
 730        disk_part_iter_exit(&piter);
 731
 732        return 0;
 733}
 734
 735static const struct seq_operations partitions_op = {
 736        .start  = show_partition_start,
 737        .next   = disk_seqf_next,
 738        .stop   = disk_seqf_stop,
 739        .show   = show_partition
 740};
 741
 742static int partitions_open(struct inode *inode, struct file *file)
 743{
 744        return seq_open(file, &partitions_op);
 745}
 746
 747static const struct file_operations proc_partitions_operations = {
 748        .open           = partitions_open,
 749        .read           = seq_read,
 750        .llseek         = seq_lseek,
 751        .release        = seq_release,
 752};
 753#endif
 754
 755
 756static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 757{
 758        if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 759                /* Make old-style 2.4 aliases work */
 760                request_module("block-major-%d", MAJOR(devt));
 761        return NULL;
 762}
 763
 764static int __init genhd_device_init(void)
 765{
 766        int error;
 767
 768        block_class.dev_kobj = sysfs_dev_block_kobj;
 769        error = class_register(&block_class);
 770        if (unlikely(error))
 771                return error;
 772        bdev_map = kobj_map_init(base_probe, &block_class_lock);
 773        blk_dev_init();
 774
 775        register_blkdev(BLOCK_EXT_MAJOR, "blkext");
 776
 777#ifndef CONFIG_SYSFS_DEPRECATED
 778        /* create top-level block dir */
 779        block_depr = kobject_create_and_add("block", NULL);
 780#endif
 781        return 0;
 782}
 783
 784subsys_initcall(genhd_device_init);
 785
 786static ssize_t disk_range_show(struct device *dev,
 787                               struct device_attribute *attr, char *buf)
 788{
 789        struct gendisk *disk = dev_to_disk(dev);
 790
 791        return sprintf(buf, "%d\n", disk->minors);
 792}
 793
 794static ssize_t disk_ext_range_show(struct device *dev,
 795                                   struct device_attribute *attr, char *buf)
 796{
 797        struct gendisk *disk = dev_to_disk(dev);
 798
 799        return sprintf(buf, "%d\n", disk_max_parts(disk));
 800}
 801
 802static ssize_t disk_removable_show(struct device *dev,
 803                                   struct device_attribute *attr, char *buf)
 804{
 805        struct gendisk *disk = dev_to_disk(dev);
 806
 807        return sprintf(buf, "%d\n",
 808                       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
 809}
 810
 811static ssize_t disk_ro_show(struct device *dev,
 812                                   struct device_attribute *attr, char *buf)
 813{
 814        struct gendisk *disk = dev_to_disk(dev);
 815
 816        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 817}
 818
 819static ssize_t disk_capability_show(struct device *dev,
 820                                    struct device_attribute *attr, char *buf)
 821{
 822        struct gendisk *disk = dev_to_disk(dev);
 823
 824        return sprintf(buf, "%x\n", disk->flags);
 825}
 826
 827static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 828static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 829static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 830static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 831static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 832static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 833static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 834#ifdef CONFIG_FAIL_MAKE_REQUEST
 835static struct device_attribute dev_attr_fail =
 836        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 837#endif
 838#ifdef CONFIG_FAIL_IO_TIMEOUT
 839static struct device_attribute dev_attr_fail_timeout =
 840        __ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
 841                part_timeout_store);
 842#endif
 843
 844static struct attribute *disk_attrs[] = {
 845        &dev_attr_range.attr,
 846        &dev_attr_ext_range.attr,
 847        &dev_attr_removable.attr,
 848        &dev_attr_ro.attr,
 849        &dev_attr_size.attr,
 850        &dev_attr_capability.attr,
 851        &dev_attr_stat.attr,
 852#ifdef CONFIG_FAIL_MAKE_REQUEST
 853        &dev_attr_fail.attr,
 854#endif
 855#ifdef CONFIG_FAIL_IO_TIMEOUT
 856        &dev_attr_fail_timeout.attr,
 857#endif
 858        NULL
 859};
 860
 861static struct attribute_group disk_attr_group = {
 862        .attrs = disk_attrs,
 863};
 864
 865static struct attribute_group *disk_attr_groups[] = {
 866        &disk_attr_group,
 867        NULL
 868};
 869
 870static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
 871{
 872        struct disk_part_tbl *ptbl =
 873                container_of(head, struct disk_part_tbl, rcu_head);
 874
 875        kfree(ptbl);
 876}
 877
 878/**
 879 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
 880 * @disk: disk to replace part_tbl for
 881 * @new_ptbl: new part_tbl to install
 882 *
 883 * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
 884 * original ptbl is freed using RCU callback.
 885 *
 886 * LOCKING:
 887 * Matching bd_mutx locked.
 888 */
 889static void disk_replace_part_tbl(struct gendisk *disk,
 890                                  struct disk_part_tbl *new_ptbl)
 891{
 892        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 893
 894        rcu_assign_pointer(disk->part_tbl, new_ptbl);
 895        if (old_ptbl)
 896                call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
 897}
 898
 899/**
 900 * disk_expand_part_tbl - expand disk->part_tbl
 901 * @disk: disk to expand part_tbl for
 902 * @partno: expand such that this partno can fit in
 903 *
 904 * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
 905 * uses RCU to allow unlocked dereferencing for stats and other stuff.
 906 *
 907 * LOCKING:
 908 * Matching bd_mutex locked, might sleep.
 909 *
 910 * RETURNS:
 911 * 0 on success, -errno on failure.
 912 */
 913int disk_expand_part_tbl(struct gendisk *disk, int partno)
 914{
 915        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 916        struct disk_part_tbl *new_ptbl;
 917        int len = old_ptbl ? old_ptbl->len : 0;
 918        int target = partno + 1;
 919        size_t size;
 920        int i;
 921
 922        /* disk_max_parts() is zero during initialization, ignore if so */
 923        if (disk_max_parts(disk) && target > disk_max_parts(disk))
 924                return -EINVAL;
 925
 926        if (target <= len)
 927                return 0;
 928
 929        size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
 930        new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
 931        if (!new_ptbl)
 932                return -ENOMEM;
 933
 934        INIT_RCU_HEAD(&new_ptbl->rcu_head);
 935        new_ptbl->len = target;
 936
 937        for (i = 0; i < len; i++)
 938                rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
 939
 940        disk_replace_part_tbl(disk, new_ptbl);
 941        return 0;
 942}
 943
 944static void disk_release(struct device *dev)
 945{
 946        struct gendisk *disk = dev_to_disk(dev);
 947
 948        kfree(disk->random);
 949        disk_replace_part_tbl(disk, NULL);
 950        free_part_stats(&disk->part0);
 951        kfree(disk);
 952}
 953struct class block_class = {
 954        .name           = "block",
 955};
 956
 957static struct device_type disk_type = {
 958        .name           = "disk",
 959        .groups         = disk_attr_groups,
 960        .release        = disk_release,
 961};
 962
 963#ifdef CONFIG_PROC_FS
 964/*
 965 * aggregate disk stat collector.  Uses the same stats that the sysfs
 966 * entries do, above, but makes them available through one seq_file.
 967 *
 968 * The output looks suspiciously like /proc/partitions with a bunch of
 969 * extra fields.
 970 */
 971static int diskstats_show(struct seq_file *seqf, void *v)
 972{
 973        struct gendisk *gp = v;
 974        struct disk_part_iter piter;
 975        struct hd_struct *hd;
 976        char buf[BDEVNAME_SIZE];
 977        int cpu;
 978
 979        /*
 980        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
 981                seq_puts(seqf,  "major minor name"
 982                                "     rio rmerge rsect ruse wio wmerge "
 983                                "wsect wuse running use aveq"
 984                                "\n\n");
 985        */
 986 
 987        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
 988        while ((hd = disk_part_iter_next(&piter))) {
 989                cpu = part_stat_lock();
 990                part_round_stats(cpu, hd);
 991                part_stat_unlock();
 992                seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
 993                           "%u %lu %lu %llu %u %u %u %u\n",
 994                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 995                           disk_name(gp, hd->partno, buf),
 996                           part_stat_read(hd, ios[0]),
 997                           part_stat_read(hd, merges[0]),
 998                           (unsigned long long)part_stat_read(hd, sectors[0]),
 999                           jiffies_to_msecs(part_stat_read(hd, ticks[0])),
1000                           part_stat_read(hd, ios[1]),
1001                           part_stat_read(hd, merges[1]),
1002                           (unsigned long long)part_stat_read(hd, sectors[1]),
1003                           jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1004                           hd->in_flight,
1005                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1006                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1007                        );
1008        }
1009        disk_part_iter_exit(&piter);
1010 
1011        return 0;
1012}
1013
1014static const struct seq_operations diskstats_op = {
1015        .start  = disk_seqf_start,
1016        .next   = disk_seqf_next,
1017        .stop   = disk_seqf_stop,
1018        .show   = diskstats_show
1019};
1020
1021static int diskstats_open(struct inode *inode, struct file *file)
1022{
1023        return seq_open(file, &diskstats_op);
1024}
1025
1026static const struct file_operations proc_diskstats_operations = {
1027        .open           = diskstats_open,
1028        .read           = seq_read,
1029        .llseek         = seq_lseek,
1030        .release        = seq_release,
1031};
1032
1033static int __init proc_genhd_init(void)
1034{
1035        proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
1036        proc_create("partitions", 0, NULL, &proc_partitions_operations);
1037        return 0;
1038}
1039module_init(proc_genhd_init);
1040#endif /* CONFIG_PROC_FS */
1041
1042static void media_change_notify_thread(struct work_struct *work)
1043{
1044        struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1045        char event[] = "MEDIA_CHANGE=1";
1046        char *envp[] = { event, NULL };
1047
1048        /*
1049         * set enviroment vars to indicate which event this is for
1050         * so that user space will know to go check the media status.
1051         */
1052        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1053        put_device(gd->driverfs_dev);
1054}
1055
1056#if 0
1057void genhd_media_change_notify(struct gendisk *disk)
1058{
1059        get_device(disk->driverfs_dev);
1060        schedule_work(&disk->async_notify);
1061}
1062EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1063#endif  /*  0  */
1064
1065dev_t blk_lookup_devt(const char *name, int partno)
1066{
1067        dev_t devt = MKDEV(0, 0);
1068        struct class_dev_iter iter;
1069        struct device *dev;
1070
1071        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1072        while ((dev = class_dev_iter_next(&iter))) {
1073                struct gendisk *disk = dev_to_disk(dev);
1074                struct hd_struct *part;
1075
1076                if (strcmp(dev->bus_id, name))
1077                        continue;
1078
1079                part = disk_get_part(disk, partno);
1080                if (part) {
1081                        devt = part_devt(part);
1082                        disk_put_part(part);
1083                        break;
1084                }
1085                disk_put_part(part);
1086        }
1087        class_dev_iter_exit(&iter);
1088        return devt;
1089}
1090EXPORT_SYMBOL(blk_lookup_devt);
1091
1092struct gendisk *alloc_disk(int minors)
1093{
1094        return alloc_disk_node(minors, -1);
1095}
1096EXPORT_SYMBOL(alloc_disk);
1097
1098struct gendisk *alloc_disk_node(int minors, int node_id)
1099{
1100        struct gendisk *disk;
1101
1102        disk = kmalloc_node(sizeof(struct gendisk),
1103                                GFP_KERNEL | __GFP_ZERO, node_id);
1104        if (disk) {
1105                if (!init_part_stats(&disk->part0)) {
1106                        kfree(disk);
1107                        return NULL;
1108                }
1109                disk->node_id = node_id;
1110                if (disk_expand_part_tbl(disk, 0)) {
1111                        free_part_stats(&disk->part0);
1112                        kfree(disk);
1113                        return NULL;
1114                }
1115                disk->part_tbl->part[0] = &disk->part0;
1116
1117                disk->minors = minors;
1118                rand_initialize_disk(disk);
1119                disk_to_dev(disk)->class = &block_class;
1120                disk_to_dev(disk)->type = &disk_type;
1121                device_initialize(disk_to_dev(disk));
1122                INIT_WORK(&disk->async_notify,
1123                        media_change_notify_thread);
1124        }
1125        return disk;
1126}
1127EXPORT_SYMBOL(alloc_disk_node);
1128
1129struct kobject *get_disk(struct gendisk *disk)
1130{
1131        struct module *owner;
1132        struct kobject *kobj;
1133
1134        if (!disk->fops)
1135                return NULL;
1136        owner = disk->fops->owner;
1137        if (owner && !try_module_get(owner))
1138                return NULL;
1139        kobj = kobject_get(&disk_to_dev(disk)->kobj);
1140        if (kobj == NULL) {
1141                module_put(owner);
1142                return NULL;
1143        }
1144        return kobj;
1145
1146}
1147
1148EXPORT_SYMBOL(get_disk);
1149
1150void put_disk(struct gendisk *disk)
1151{
1152        if (disk)
1153                kobject_put(&disk_to_dev(disk)->kobj);
1154}
1155
1156EXPORT_SYMBOL(put_disk);
1157
1158void set_device_ro(struct block_device *bdev, int flag)
1159{
1160        bdev->bd_part->policy = flag;
1161}
1162
1163EXPORT_SYMBOL(set_device_ro);
1164
1165void set_disk_ro(struct gendisk *disk, int flag)
1166{
1167        struct disk_part_iter piter;
1168        struct hd_struct *part;
1169
1170        disk_part_iter_init(&piter, disk,
1171                            DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
1172        while ((part = disk_part_iter_next(&piter)))
1173                part->policy = flag;
1174        disk_part_iter_exit(&piter);
1175}
1176
1177EXPORT_SYMBOL(set_disk_ro);
1178
1179int bdev_read_only(struct block_device *bdev)
1180{
1181        if (!bdev)
1182                return 0;
1183        return bdev->bd_part->policy;
1184}
1185
1186EXPORT_SYMBOL(bdev_read_only);
1187
1188int invalidate_partition(struct gendisk *disk, int partno)
1189{
1190        int res = 0;
1191        struct block_device *bdev = bdget_disk(disk, partno);
1192        if (bdev) {
1193                fsync_bdev(bdev);
1194                res = __invalidate_device(bdev);
1195                bdput(bdev);
1196        }
1197        return res;
1198}
1199
1200EXPORT_SYMBOL(invalidate_partition);
1201