linux/block/genhd.c
<<
>>
Prefs
   1/*
   2 *  gendisk handling
   3 */
   4
   5#include <linux/module.h>
   6#include <linux/fs.h>
   7#include <linux/genhd.h>
   8#include <linux/kdev_t.h>
   9#include <linux/kernel.h>
  10#include <linux/blkdev.h>
  11#include <linux/init.h>
  12#include <linux/spinlock.h>
  13#include <linux/proc_fs.h>
  14#include <linux/seq_file.h>
  15#include <linux/slab.h>
  16#include <linux/kmod.h>
  17#include <linux/kobj_map.h>
  18#include <linux/buffer_head.h>
  19#include <linux/mutex.h>
  20#include <linux/idr.h>
  21
  22#include "blk.h"
  23
  24static DEFINE_MUTEX(block_class_lock);
  25#ifndef CONFIG_SYSFS_DEPRECATED
  26struct kobject *block_depr;
  27#endif
  28
  29/* for extended dynamic devt allocation, currently only one major is used */
  30#define MAX_EXT_DEVT            (1 << MINORBITS)
  31
  32/* For extended devt allocation.  ext_devt_mutex prevents look up
  33 * results from going away underneath its user.
  34 */
  35static DEFINE_MUTEX(ext_devt_mutex);
  36static DEFINE_IDR(ext_devt_idr);
  37
  38static struct device_type disk_type;
  39
  40/**
  41 * disk_get_part - get partition
  42 * @disk: disk to look partition from
  43 * @partno: partition number
  44 *
  45 * Look for partition @partno from @disk.  If found, increment
  46 * reference count and return it.
  47 *
  48 * CONTEXT:
  49 * Don't care.
  50 *
  51 * RETURNS:
  52 * Pointer to the found partition on success, NULL if not found.
  53 */
  54struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
  55{
  56        struct hd_struct *part = NULL;
  57        struct disk_part_tbl *ptbl;
  58
  59        if (unlikely(partno < 0))
  60                return NULL;
  61
  62        rcu_read_lock();
  63
  64        ptbl = rcu_dereference(disk->part_tbl);
  65        if (likely(partno < ptbl->len)) {
  66                part = rcu_dereference(ptbl->part[partno]);
  67                if (part)
  68                        get_device(part_to_dev(part));
  69        }
  70
  71        rcu_read_unlock();
  72
  73        return part;
  74}
  75EXPORT_SYMBOL_GPL(disk_get_part);
  76
  77/**
  78 * disk_part_iter_init - initialize partition iterator
  79 * @piter: iterator to initialize
  80 * @disk: disk to iterate over
  81 * @flags: DISK_PITER_* flags
  82 *
  83 * Initialize @piter so that it iterates over partitions of @disk.
  84 *
  85 * CONTEXT:
  86 * Don't care.
  87 */
  88void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
  89                          unsigned int flags)
  90{
  91        struct disk_part_tbl *ptbl;
  92
  93        rcu_read_lock();
  94        ptbl = rcu_dereference(disk->part_tbl);
  95
  96        piter->disk = disk;
  97        piter->part = NULL;
  98
  99        if (flags & DISK_PITER_REVERSE)
 100                piter->idx = ptbl->len - 1;
 101        else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
 102                piter->idx = 0;
 103        else
 104                piter->idx = 1;
 105
 106        piter->flags = flags;
 107
 108        rcu_read_unlock();
 109}
 110EXPORT_SYMBOL_GPL(disk_part_iter_init);
 111
 112/**
 113 * disk_part_iter_next - proceed iterator to the next partition and return it
 114 * @piter: iterator of interest
 115 *
 116 * Proceed @piter to the next partition and return it.
 117 *
 118 * CONTEXT:
 119 * Don't care.
 120 */
 121struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
 122{
 123        struct disk_part_tbl *ptbl;
 124        int inc, end;
 125
 126        /* put the last partition */
 127        disk_put_part(piter->part);
 128        piter->part = NULL;
 129
 130        /* get part_tbl */
 131        rcu_read_lock();
 132        ptbl = rcu_dereference(piter->disk->part_tbl);
 133
 134        /* determine iteration parameters */
 135        if (piter->flags & DISK_PITER_REVERSE) {
 136                inc = -1;
 137                if (piter->flags & (DISK_PITER_INCL_PART0 |
 138                                    DISK_PITER_INCL_EMPTY_PART0))
 139                        end = -1;
 140                else
 141                        end = 0;
 142        } else {
 143                inc = 1;
 144                end = ptbl->len;
 145        }
 146
 147        /* iterate to the next partition */
 148        for (; piter->idx != end; piter->idx += inc) {
 149                struct hd_struct *part;
 150
 151                part = rcu_dereference(ptbl->part[piter->idx]);
 152                if (!part)
 153                        continue;
 154                if (!part->nr_sects &&
 155                    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
 156                    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
 157                      piter->idx == 0))
 158                        continue;
 159
 160                get_device(part_to_dev(part));
 161                piter->part = part;
 162                piter->idx += inc;
 163                break;
 164        }
 165
 166        rcu_read_unlock();
 167
 168        return piter->part;
 169}
 170EXPORT_SYMBOL_GPL(disk_part_iter_next);
 171
 172/**
 173 * disk_part_iter_exit - finish up partition iteration
 174 * @piter: iter of interest
 175 *
 176 * Called when iteration is over.  Cleans up @piter.
 177 *
 178 * CONTEXT:
 179 * Don't care.
 180 */
 181void disk_part_iter_exit(struct disk_part_iter *piter)
 182{
 183        disk_put_part(piter->part);
 184        piter->part = NULL;
 185}
 186EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 187
 188static inline int sector_in_part(struct hd_struct *part, sector_t sector)
 189{
 190        return part->start_sect <= sector &&
 191                sector < part->start_sect + part->nr_sects;
 192}
 193
 194/**
 195 * disk_map_sector_rcu - map sector to partition
 196 * @disk: gendisk of interest
 197 * @sector: sector to map
 198 *
 199 * Find out which partition @sector maps to on @disk.  This is
 200 * primarily used for stats accounting.
 201 *
 202 * CONTEXT:
 203 * RCU read locked.  The returned partition pointer is valid only
 204 * while preemption is disabled.
 205 *
 206 * RETURNS:
 207 * Found partition on success, part0 is returned if no partition matches
 208 */
 209struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 210{
 211        struct disk_part_tbl *ptbl;
 212        struct hd_struct *part;
 213        int i;
 214
 215        ptbl = rcu_dereference(disk->part_tbl);
 216
 217        part = rcu_dereference(ptbl->last_lookup);
 218        if (part && sector_in_part(part, sector))
 219                return part;
 220
 221        for (i = 1; i < ptbl->len; i++) {
 222                part = rcu_dereference(ptbl->part[i]);
 223
 224                if (part && sector_in_part(part, sector)) {
 225                        rcu_assign_pointer(ptbl->last_lookup, part);
 226                        return part;
 227                }
 228        }
 229        return &disk->part0;
 230}
 231EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
 232
 233/*
 234 * Can be deleted altogether. Later.
 235 *
 236 */
 237static struct blk_major_name {
 238        struct blk_major_name *next;
 239        int major;
 240        char name[16];
 241} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 242
 243/* index in the above - for now: assume no multimajor ranges */
 244static inline int major_to_index(int major)
 245{
 246        return major % BLKDEV_MAJOR_HASH_SIZE;
 247}
 248
 249#ifdef CONFIG_PROC_FS
 250void blkdev_show(struct seq_file *seqf, off_t offset)
 251{
 252        struct blk_major_name *dp;
 253
 254        if (offset < BLKDEV_MAJOR_HASH_SIZE) {
 255                mutex_lock(&block_class_lock);
 256                for (dp = major_names[offset]; dp; dp = dp->next)
 257                        seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 258                mutex_unlock(&block_class_lock);
 259        }
 260}
 261#endif /* CONFIG_PROC_FS */
 262
 263/**
 264 * register_blkdev - register a new block device
 265 *
 266 * @major: the requested major device number [1..255]. If @major=0, try to
 267 *         allocate any unused major number.
 268 * @name: the name of the new block device as a zero terminated string
 269 *
 270 * The @name must be unique within the system.
 271 *
 272 * The return value depends on the @major input parameter.
 273 *  - if a major device number was requested in range [1..255] then the
 274 *    function returns zero on success, or a negative error code
 275 *  - if any unused major number was requested with @major=0 parameter
 276 *    then the return value is the allocated major number in range
 277 *    [1..255] or a negative error code otherwise
 278 */
 279int register_blkdev(unsigned int major, const char *name)
 280{
 281        struct blk_major_name **n, *p;
 282        int index, ret = 0;
 283
 284        mutex_lock(&block_class_lock);
 285
 286        /* temporary */
 287        if (major == 0) {
 288                for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
 289                        if (major_names[index] == NULL)
 290                                break;
 291                }
 292
 293                if (index == 0) {
 294                        printk("register_blkdev: failed to get major for %s\n",
 295                               name);
 296                        ret = -EBUSY;
 297                        goto out;
 298                }
 299                major = index;
 300                ret = major;
 301        }
 302
 303        p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
 304        if (p == NULL) {
 305                ret = -ENOMEM;
 306                goto out;
 307        }
 308
 309        p->major = major;
 310        strlcpy(p->name, name, sizeof(p->name));
 311        p->next = NULL;
 312        index = major_to_index(major);
 313
 314        for (n = &major_names[index]; *n; n = &(*n)->next) {
 315                if ((*n)->major == major)
 316                        break;
 317        }
 318        if (!*n)
 319                *n = p;
 320        else
 321                ret = -EBUSY;
 322
 323        if (ret < 0) {
 324                printk("register_blkdev: cannot get major %d for %s\n",
 325                       major, name);
 326                kfree(p);
 327        }
 328out:
 329        mutex_unlock(&block_class_lock);
 330        return ret;
 331}
 332
 333EXPORT_SYMBOL(register_blkdev);
 334
 335void unregister_blkdev(unsigned int major, const char *name)
 336{
 337        struct blk_major_name **n;
 338        struct blk_major_name *p = NULL;
 339        int index = major_to_index(major);
 340
 341        mutex_lock(&block_class_lock);
 342        for (n = &major_names[index]; *n; n = &(*n)->next)
 343                if ((*n)->major == major)
 344                        break;
 345        if (!*n || strcmp((*n)->name, name)) {
 346                WARN_ON(1);
 347        } else {
 348                p = *n;
 349                *n = p->next;
 350        }
 351        mutex_unlock(&block_class_lock);
 352        kfree(p);
 353}
 354
 355EXPORT_SYMBOL(unregister_blkdev);
 356
 357static struct kobj_map *bdev_map;
 358
 359/**
 360 * blk_mangle_minor - scatter minor numbers apart
 361 * @minor: minor number to mangle
 362 *
 363 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 364 * is enabled.  Mangling twice gives the original value.
 365 *
 366 * RETURNS:
 367 * Mangled value.
 368 *
 369 * CONTEXT:
 370 * Don't care.
 371 */
 372static int blk_mangle_minor(int minor)
 373{
 374#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
 375        int i;
 376
 377        for (i = 0; i < MINORBITS / 2; i++) {
 378                int low = minor & (1 << i);
 379                int high = minor & (1 << (MINORBITS - 1 - i));
 380                int distance = MINORBITS - 1 - 2 * i;
 381
 382                minor ^= low | high;    /* clear both bits */
 383                low <<= distance;       /* swap the positions */
 384                high >>= distance;
 385                minor |= low | high;    /* and set */
 386        }
 387#endif
 388        return minor;
 389}
 390
 391/**
 392 * blk_alloc_devt - allocate a dev_t for a partition
 393 * @part: partition to allocate dev_t for
 394 * @devt: out parameter for resulting dev_t
 395 *
 396 * Allocate a dev_t for block device.
 397 *
 398 * RETURNS:
 399 * 0 on success, allocated dev_t is returned in *@devt.  -errno on
 400 * failure.
 401 *
 402 * CONTEXT:
 403 * Might sleep.
 404 */
 405int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
 406{
 407        struct gendisk *disk = part_to_disk(part);
 408        int idx, rc;
 409
 410        /* in consecutive minor range? */
 411        if (part->partno < disk->minors) {
 412                *devt = MKDEV(disk->major, disk->first_minor + part->partno);
 413                return 0;
 414        }
 415
 416        /* allocate ext devt */
 417        do {
 418                if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
 419                        return -ENOMEM;
 420                rc = idr_get_new(&ext_devt_idr, part, &idx);
 421        } while (rc == -EAGAIN);
 422
 423        if (rc)
 424                return rc;
 425
 426        if (idx > MAX_EXT_DEVT) {
 427                idr_remove(&ext_devt_idr, idx);
 428                return -EBUSY;
 429        }
 430
 431        *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
 432        return 0;
 433}
 434
 435/**
 436 * blk_free_devt - free a dev_t
 437 * @devt: dev_t to free
 438 *
 439 * Free @devt which was allocated using blk_alloc_devt().
 440 *
 441 * CONTEXT:
 442 * Might sleep.
 443 */
 444void blk_free_devt(dev_t devt)
 445{
 446        might_sleep();
 447
 448        if (devt == MKDEV(0, 0))
 449                return;
 450
 451        if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
 452                mutex_lock(&ext_devt_mutex);
 453                idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 454                mutex_unlock(&ext_devt_mutex);
 455        }
 456}
 457
 458static char *bdevt_str(dev_t devt, char *buf)
 459{
 460        if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
 461                char tbuf[BDEVT_SIZE];
 462                snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
 463                snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
 464        } else
 465                snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
 466
 467        return buf;
 468}
 469
 470/*
 471 * Register device numbers dev..(dev+range-1)
 472 * range must be nonzero
 473 * The hash chain is sorted on range, so that subranges can override.
 474 */
 475void blk_register_region(dev_t devt, unsigned long range, struct module *module,
 476                         struct kobject *(*probe)(dev_t, int *, void *),
 477                         int (*lock)(dev_t, void *), void *data)
 478{
 479        kobj_map(bdev_map, devt, range, module, probe, lock, data);
 480}
 481
 482EXPORT_SYMBOL(blk_register_region);
 483
 484void blk_unregister_region(dev_t devt, unsigned long range)
 485{
 486        kobj_unmap(bdev_map, devt, range);
 487}
 488
 489EXPORT_SYMBOL(blk_unregister_region);
 490
 491static struct kobject *exact_match(dev_t devt, int *partno, void *data)
 492{
 493        struct gendisk *p = data;
 494
 495        return &disk_to_dev(p)->kobj;
 496}
 497
 498static int exact_lock(dev_t devt, void *data)
 499{
 500        struct gendisk *p = data;
 501
 502        if (!get_disk(p))
 503                return -1;
 504        return 0;
 505}
 506
 507/**
 508 * add_disk - add partitioning information to kernel list
 509 * @disk: per-device partitioning information
 510 *
 511 * This function registers the partitioning information in @disk
 512 * with the kernel.
 513 *
 514 * FIXME: error handling
 515 */
 516void add_disk(struct gendisk *disk)
 517{
 518        struct backing_dev_info *bdi;
 519        dev_t devt;
 520        int retval;
 521
 522        /* minors == 0 indicates to use ext devt from part0 and should
 523         * be accompanied with EXT_DEVT flag.  Make sure all
 524         * parameters make sense.
 525         */
 526        WARN_ON(disk->minors && !(disk->major || disk->first_minor));
 527        WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
 528
 529        disk->flags |= GENHD_FL_UP;
 530
 531        retval = blk_alloc_devt(&disk->part0, &devt);
 532        if (retval) {
 533                WARN_ON(1);
 534                return;
 535        }
 536        disk_to_dev(disk)->devt = devt;
 537
 538        /* ->major and ->first_minor aren't supposed to be
 539         * dereferenced from here on, but set them just in case.
 540         */
 541        disk->major = MAJOR(devt);
 542        disk->first_minor = MINOR(devt);
 543
 544        blk_register_region(disk_devt(disk), disk->minors, NULL,
 545                            exact_match, exact_lock, disk);
 546        register_disk(disk);
 547        blk_register_queue(disk);
 548
 549        bdi = &disk->queue->backing_dev_info;
 550        bdi_register_dev(bdi, disk_devt(disk));
 551        retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 552                                   "bdi");
 553        WARN_ON(retval);
 554}
 555
 556EXPORT_SYMBOL(add_disk);
 557EXPORT_SYMBOL(del_gendisk);     /* in partitions/check.c */
 558
 559void unlink_gendisk(struct gendisk *disk)
 560{
 561        sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 562        bdi_unregister(&disk->queue->backing_dev_info);
 563        blk_unregister_queue(disk);
 564        blk_unregister_region(disk_devt(disk), disk->minors);
 565}
 566
 567/**
 568 * get_gendisk - get partitioning information for a given device
 569 * @devt: device to get partitioning information for
 570 * @partno: returned partition index
 571 *
 572 * This function gets the structure containing partitioning
 573 * information for the given device @devt.
 574 */
 575struct gendisk *get_gendisk(dev_t devt, int *partno)
 576{
 577        struct gendisk *disk = NULL;
 578
 579        if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
 580                struct kobject *kobj;
 581
 582                kobj = kobj_lookup(bdev_map, devt, partno);
 583                if (kobj)
 584                        disk = dev_to_disk(kobj_to_dev(kobj));
 585        } else {
 586                struct hd_struct *part;
 587
 588                mutex_lock(&ext_devt_mutex);
 589                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 590                if (part && get_disk(part_to_disk(part))) {
 591                        *partno = part->partno;
 592                        disk = part_to_disk(part);
 593                }
 594                mutex_unlock(&ext_devt_mutex);
 595        }
 596
 597        return disk;
 598}
 599EXPORT_SYMBOL(get_gendisk);
 600
 601/**
 602 * bdget_disk - do bdget() by gendisk and partition number
 603 * @disk: gendisk of interest
 604 * @partno: partition number
 605 *
 606 * Find partition @partno from @disk, do bdget() on it.
 607 *
 608 * CONTEXT:
 609 * Don't care.
 610 *
 611 * RETURNS:
 612 * Resulting block_device on success, NULL on failure.
 613 */
 614struct block_device *bdget_disk(struct gendisk *disk, int partno)
 615{
 616        struct hd_struct *part;
 617        struct block_device *bdev = NULL;
 618
 619        part = disk_get_part(disk, partno);
 620        if (part)
 621                bdev = bdget(part_devt(part));
 622        disk_put_part(part);
 623
 624        return bdev;
 625}
 626EXPORT_SYMBOL(bdget_disk);
 627
 628/*
 629 * print a full list of all partitions - intended for places where the root
 630 * filesystem can't be mounted and thus to give the victim some idea of what
 631 * went wrong
 632 */
 633void __init printk_all_partitions(void)
 634{
 635        struct class_dev_iter iter;
 636        struct device *dev;
 637
 638        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 639        while ((dev = class_dev_iter_next(&iter))) {
 640                struct gendisk *disk = dev_to_disk(dev);
 641                struct disk_part_iter piter;
 642                struct hd_struct *part;
 643                char name_buf[BDEVNAME_SIZE];
 644                char devt_buf[BDEVT_SIZE];
 645
 646                /*
 647                 * Don't show empty devices or things that have been
 648                 * surpressed
 649                 */
 650                if (get_capacity(disk) == 0 ||
 651                    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
 652                        continue;
 653
 654                /*
 655                 * Note, unlike /proc/partitions, I am showing the
 656                 * numbers in hex - the same format as the root=
 657                 * option takes.
 658                 */
 659                disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
 660                while ((part = disk_part_iter_next(&piter))) {
 661                        bool is_part0 = part == &disk->part0;
 662
 663                        printk("%s%s %10llu %s", is_part0 ? "" : "  ",
 664                               bdevt_str(part_devt(part), devt_buf),
 665                               (unsigned long long)part->nr_sects >> 1,
 666                               disk_name(disk, part->partno, name_buf));
 667                        if (is_part0) {
 668                                if (disk->driverfs_dev != NULL &&
 669                                    disk->driverfs_dev->driver != NULL)
 670                                        printk(" driver: %s\n",
 671                                              disk->driverfs_dev->driver->name);
 672                                else
 673                                        printk(" (driver?)\n");
 674                        } else
 675                                printk("\n");
 676                }
 677                disk_part_iter_exit(&piter);
 678        }
 679        class_dev_iter_exit(&iter);
 680}
 681
 682#ifdef CONFIG_PROC_FS
 683/* iterator */
 684static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 685{
 686        loff_t skip = *pos;
 687        struct class_dev_iter *iter;
 688        struct device *dev;
 689
 690        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 691        if (!iter)
 692                return ERR_PTR(-ENOMEM);
 693
 694        seqf->private = iter;
 695        class_dev_iter_init(iter, &block_class, NULL, &disk_type);
 696        do {
 697                dev = class_dev_iter_next(iter);
 698                if (!dev)
 699                        return NULL;
 700        } while (skip--);
 701
 702        return dev_to_disk(dev);
 703}
 704
 705static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 706{
 707        struct device *dev;
 708
 709        (*pos)++;
 710        dev = class_dev_iter_next(seqf->private);
 711        if (dev)
 712                return dev_to_disk(dev);
 713
 714        return NULL;
 715}
 716
 717static void disk_seqf_stop(struct seq_file *seqf, void *v)
 718{
 719        struct class_dev_iter *iter = seqf->private;
 720
 721        /* stop is called even after start failed :-( */
 722        if (iter) {
 723                class_dev_iter_exit(iter);
 724                kfree(iter);
 725        }
 726}
 727
 728static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 729{
 730        static void *p;
 731
 732        p = disk_seqf_start(seqf, pos);
 733        if (!IS_ERR(p) && p && !*pos)
 734                seq_puts(seqf, "major minor  #blocks  name\n\n");
 735        return p;
 736}
 737
 738static int show_partition(struct seq_file *seqf, void *v)
 739{
 740        struct gendisk *sgp = v;
 741        struct disk_part_iter piter;
 742        struct hd_struct *part;
 743        char buf[BDEVNAME_SIZE];
 744
 745        /* Don't show non-partitionable removeable devices or empty devices */
 746        if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
 747                                   (sgp->flags & GENHD_FL_REMOVABLE)))
 748                return 0;
 749        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
 750                return 0;
 751
 752        /* show the full disk and all non-0 size partitions of it */
 753        disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
 754        while ((part = disk_part_iter_next(&piter)))
 755                seq_printf(seqf, "%4d  %7d %10llu %s\n",
 756                           MAJOR(part_devt(part)), MINOR(part_devt(part)),
 757                           (unsigned long long)part->nr_sects >> 1,
 758                           disk_name(sgp, part->partno, buf));
 759        disk_part_iter_exit(&piter);
 760
 761        return 0;
 762}
 763
 764static const struct seq_operations partitions_op = {
 765        .start  = show_partition_start,
 766        .next   = disk_seqf_next,
 767        .stop   = disk_seqf_stop,
 768        .show   = show_partition
 769};
 770
 771static int partitions_open(struct inode *inode, struct file *file)
 772{
 773        return seq_open(file, &partitions_op);
 774}
 775
 776static const struct file_operations proc_partitions_operations = {
 777        .open           = partitions_open,
 778        .read           = seq_read,
 779        .llseek         = seq_lseek,
 780        .release        = seq_release,
 781};
 782#endif
 783
 784
 785static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 786{
 787        if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 788                /* Make old-style 2.4 aliases work */
 789                request_module("block-major-%d", MAJOR(devt));
 790        return NULL;
 791}
 792
 793static int __init genhd_device_init(void)
 794{
 795        int error;
 796
 797        block_class.dev_kobj = sysfs_dev_block_kobj;
 798        error = class_register(&block_class);
 799        if (unlikely(error))
 800                return error;
 801        bdev_map = kobj_map_init(base_probe, &block_class_lock);
 802        blk_dev_init();
 803
 804        register_blkdev(BLOCK_EXT_MAJOR, "blkext");
 805
 806#ifndef CONFIG_SYSFS_DEPRECATED
 807        /* create top-level block dir */
 808        block_depr = kobject_create_and_add("block", NULL);
 809#endif
 810        return 0;
 811}
 812
 813subsys_initcall(genhd_device_init);
 814
 815static ssize_t disk_range_show(struct device *dev,
 816                               struct device_attribute *attr, char *buf)
 817{
 818        struct gendisk *disk = dev_to_disk(dev);
 819
 820        return sprintf(buf, "%d\n", disk->minors);
 821}
 822
 823static ssize_t disk_ext_range_show(struct device *dev,
 824                                   struct device_attribute *attr, char *buf)
 825{
 826        struct gendisk *disk = dev_to_disk(dev);
 827
 828        return sprintf(buf, "%d\n", disk_max_parts(disk));
 829}
 830
 831static ssize_t disk_removable_show(struct device *dev,
 832                                   struct device_attribute *attr, char *buf)
 833{
 834        struct gendisk *disk = dev_to_disk(dev);
 835
 836        return sprintf(buf, "%d\n",
 837                       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
 838}
 839
 840static ssize_t disk_ro_show(struct device *dev,
 841                                   struct device_attribute *attr, char *buf)
 842{
 843        struct gendisk *disk = dev_to_disk(dev);
 844
 845        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 846}
 847
 848static ssize_t disk_capability_show(struct device *dev,
 849                                    struct device_attribute *attr, char *buf)
 850{
 851        struct gendisk *disk = dev_to_disk(dev);
 852
 853        return sprintf(buf, "%x\n", disk->flags);
 854}
 855
 856static ssize_t disk_alignment_offset_show(struct device *dev,
 857                                          struct device_attribute *attr,
 858                                          char *buf)
 859{
 860        struct gendisk *disk = dev_to_disk(dev);
 861
 862        return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 863}
 864
 865static ssize_t disk_discard_alignment_show(struct device *dev,
 866                                           struct device_attribute *attr,
 867                                           char *buf)
 868{
 869        struct gendisk *disk = dev_to_disk(dev);
 870
 871        return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
 872}
 873
 874static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 875static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 876static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 877static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 878static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 879static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 880static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
 881                   NULL);
 882static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 883static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 884static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 885#ifdef CONFIG_FAIL_MAKE_REQUEST
 886static struct device_attribute dev_attr_fail =
 887        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 888#endif
 889#ifdef CONFIG_FAIL_IO_TIMEOUT
 890static struct device_attribute dev_attr_fail_timeout =
 891        __ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
 892                part_timeout_store);
 893#endif
 894
 895static struct attribute *disk_attrs[] = {
 896        &dev_attr_range.attr,
 897        &dev_attr_ext_range.attr,
 898        &dev_attr_removable.attr,
 899        &dev_attr_ro.attr,
 900        &dev_attr_size.attr,
 901        &dev_attr_alignment_offset.attr,
 902        &dev_attr_discard_alignment.attr,
 903        &dev_attr_capability.attr,
 904        &dev_attr_stat.attr,
 905        &dev_attr_inflight.attr,
 906#ifdef CONFIG_FAIL_MAKE_REQUEST
 907        &dev_attr_fail.attr,
 908#endif
 909#ifdef CONFIG_FAIL_IO_TIMEOUT
 910        &dev_attr_fail_timeout.attr,
 911#endif
 912        NULL
 913};
 914
 915static struct attribute_group disk_attr_group = {
 916        .attrs = disk_attrs,
 917};
 918
 919static const struct attribute_group *disk_attr_groups[] = {
 920        &disk_attr_group,
 921        NULL
 922};
 923
 924static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
 925{
 926        struct disk_part_tbl *ptbl =
 927                container_of(head, struct disk_part_tbl, rcu_head);
 928
 929        kfree(ptbl);
 930}
 931
 932/**
 933 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
 934 * @disk: disk to replace part_tbl for
 935 * @new_ptbl: new part_tbl to install
 936 *
 937 * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
 938 * original ptbl is freed using RCU callback.
 939 *
 940 * LOCKING:
 941 * Matching bd_mutx locked.
 942 */
 943static void disk_replace_part_tbl(struct gendisk *disk,
 944                                  struct disk_part_tbl *new_ptbl)
 945{
 946        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 947
 948        rcu_assign_pointer(disk->part_tbl, new_ptbl);
 949
 950        if (old_ptbl) {
 951                rcu_assign_pointer(old_ptbl->last_lookup, NULL);
 952                call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
 953        }
 954}
 955
 956/**
 957 * disk_expand_part_tbl - expand disk->part_tbl
 958 * @disk: disk to expand part_tbl for
 959 * @partno: expand such that this partno can fit in
 960 *
 961 * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
 962 * uses RCU to allow unlocked dereferencing for stats and other stuff.
 963 *
 964 * LOCKING:
 965 * Matching bd_mutex locked, might sleep.
 966 *
 967 * RETURNS:
 968 * 0 on success, -errno on failure.
 969 */
 970int disk_expand_part_tbl(struct gendisk *disk, int partno)
 971{
 972        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 973        struct disk_part_tbl *new_ptbl;
 974        int len = old_ptbl ? old_ptbl->len : 0;
 975        int target = partno + 1;
 976        size_t size;
 977        int i;
 978
 979        /* disk_max_parts() is zero during initialization, ignore if so */
 980        if (disk_max_parts(disk) && target > disk_max_parts(disk))
 981                return -EINVAL;
 982
 983        if (target <= len)
 984                return 0;
 985
 986        size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
 987        new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
 988        if (!new_ptbl)
 989                return -ENOMEM;
 990
 991        new_ptbl->len = target;
 992
 993        for (i = 0; i < len; i++)
 994                rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
 995
 996        disk_replace_part_tbl(disk, new_ptbl);
 997        return 0;
 998}
 999
1000static void disk_release(struct device *dev)
1001{
1002        struct gendisk *disk = dev_to_disk(dev);
1003
1004        kfree(disk->random);
1005        disk_replace_part_tbl(disk, NULL);
1006        free_part_stats(&disk->part0);
1007        kfree(disk);
1008}
1009struct class block_class = {
1010        .name           = "block",
1011};
1012
1013static char *block_devnode(struct device *dev, mode_t *mode)
1014{
1015        struct gendisk *disk = dev_to_disk(dev);
1016
1017        if (disk->devnode)
1018                return disk->devnode(disk, mode);
1019        return NULL;
1020}
1021
1022static struct device_type disk_type = {
1023        .name           = "disk",
1024        .groups         = disk_attr_groups,
1025        .release        = disk_release,
1026        .devnode        = block_devnode,
1027};
1028
1029#ifdef CONFIG_PROC_FS
1030/*
1031 * aggregate disk stat collector.  Uses the same stats that the sysfs
1032 * entries do, above, but makes them available through one seq_file.
1033 *
1034 * The output looks suspiciously like /proc/partitions with a bunch of
1035 * extra fields.
1036 */
1037static int diskstats_show(struct seq_file *seqf, void *v)
1038{
1039        struct gendisk *gp = v;
1040        struct disk_part_iter piter;
1041        struct hd_struct *hd;
1042        char buf[BDEVNAME_SIZE];
1043        int cpu;
1044
1045        /*
1046        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1047                seq_puts(seqf,  "major minor name"
1048                                "     rio rmerge rsect ruse wio wmerge "
1049                                "wsect wuse running use aveq"
1050                                "\n\n");
1051        */
1052 
1053        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1054        while ((hd = disk_part_iter_next(&piter))) {
1055                cpu = part_stat_lock();
1056                part_round_stats(cpu, hd);
1057                part_stat_unlock();
1058                seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
1059                           "%u %lu %lu %llu %u %u %u %u\n",
1060                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1061                           disk_name(gp, hd->partno, buf),
1062                           part_stat_read(hd, ios[0]),
1063                           part_stat_read(hd, merges[0]),
1064                           (unsigned long long)part_stat_read(hd, sectors[0]),
1065                           jiffies_to_msecs(part_stat_read(hd, ticks[0])),
1066                           part_stat_read(hd, ios[1]),
1067                           part_stat_read(hd, merges[1]),
1068                           (unsigned long long)part_stat_read(hd, sectors[1]),
1069                           jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1070                           part_in_flight(hd),
1071                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1072                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1073                        );
1074        }
1075        disk_part_iter_exit(&piter);
1076 
1077        return 0;
1078}
1079
1080static const struct seq_operations diskstats_op = {
1081        .start  = disk_seqf_start,
1082        .next   = disk_seqf_next,
1083        .stop   = disk_seqf_stop,
1084        .show   = diskstats_show
1085};
1086
1087static int diskstats_open(struct inode *inode, struct file *file)
1088{
1089        return seq_open(file, &diskstats_op);
1090}
1091
1092static const struct file_operations proc_diskstats_operations = {
1093        .open           = diskstats_open,
1094        .read           = seq_read,
1095        .llseek         = seq_lseek,
1096        .release        = seq_release,
1097};
1098
1099static int __init proc_genhd_init(void)
1100{
1101        proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
1102        proc_create("partitions", 0, NULL, &proc_partitions_operations);
1103        return 0;
1104}
1105module_init(proc_genhd_init);
1106#endif /* CONFIG_PROC_FS */
1107
1108static void media_change_notify_thread(struct work_struct *work)
1109{
1110        struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1111        char event[] = "MEDIA_CHANGE=1";
1112        char *envp[] = { event, NULL };
1113
1114        /*
1115         * set enviroment vars to indicate which event this is for
1116         * so that user space will know to go check the media status.
1117         */
1118        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1119        put_device(gd->driverfs_dev);
1120}
1121
1122#if 0
1123void genhd_media_change_notify(struct gendisk *disk)
1124{
1125        get_device(disk->driverfs_dev);
1126        schedule_work(&disk->async_notify);
1127}
1128EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1129#endif  /*  0  */
1130
1131dev_t blk_lookup_devt(const char *name, int partno)
1132{
1133        dev_t devt = MKDEV(0, 0);
1134        struct class_dev_iter iter;
1135        struct device *dev;
1136
1137        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1138        while ((dev = class_dev_iter_next(&iter))) {
1139                struct gendisk *disk = dev_to_disk(dev);
1140                struct hd_struct *part;
1141
1142                if (strcmp(dev_name(dev), name))
1143                        continue;
1144
1145                if (partno < disk->minors) {
1146                        /* We need to return the right devno, even
1147                         * if the partition doesn't exist yet.
1148                         */
1149                        devt = MKDEV(MAJOR(dev->devt),
1150                                     MINOR(dev->devt) + partno);
1151                        break;
1152                }
1153                part = disk_get_part(disk, partno);
1154                if (part) {
1155                        devt = part_devt(part);
1156                        disk_put_part(part);
1157                        break;
1158                }
1159                disk_put_part(part);
1160        }
1161        class_dev_iter_exit(&iter);
1162        return devt;
1163}
1164EXPORT_SYMBOL(blk_lookup_devt);
1165
1166struct gendisk *alloc_disk(int minors)
1167{
1168        return alloc_disk_node(minors, -1);
1169}
1170EXPORT_SYMBOL(alloc_disk);
1171
1172struct gendisk *alloc_disk_node(int minors, int node_id)
1173{
1174        struct gendisk *disk;
1175
1176        disk = kmalloc_node(sizeof(struct gendisk),
1177                                GFP_KERNEL | __GFP_ZERO, node_id);
1178        if (disk) {
1179                if (!init_part_stats(&disk->part0)) {
1180                        kfree(disk);
1181                        return NULL;
1182                }
1183                disk->node_id = node_id;
1184                if (disk_expand_part_tbl(disk, 0)) {
1185                        free_part_stats(&disk->part0);
1186                        kfree(disk);
1187                        return NULL;
1188                }
1189                disk->part_tbl->part[0] = &disk->part0;
1190
1191                disk->minors = minors;
1192                rand_initialize_disk(disk);
1193                disk_to_dev(disk)->class = &block_class;
1194                disk_to_dev(disk)->type = &disk_type;
1195                device_initialize(disk_to_dev(disk));
1196                INIT_WORK(&disk->async_notify,
1197                        media_change_notify_thread);
1198        }
1199        return disk;
1200}
1201EXPORT_SYMBOL(alloc_disk_node);
1202
1203struct kobject *get_disk(struct gendisk *disk)
1204{
1205        struct module *owner;
1206        struct kobject *kobj;
1207
1208        if (!disk->fops)
1209                return NULL;
1210        owner = disk->fops->owner;
1211        if (owner && !try_module_get(owner))
1212                return NULL;
1213        kobj = kobject_get(&disk_to_dev(disk)->kobj);
1214        if (kobj == NULL) {
1215                module_put(owner);
1216                return NULL;
1217        }
1218        return kobj;
1219
1220}
1221
1222EXPORT_SYMBOL(get_disk);
1223
1224void put_disk(struct gendisk *disk)
1225{
1226        if (disk)
1227                kobject_put(&disk_to_dev(disk)->kobj);
1228}
1229
1230EXPORT_SYMBOL(put_disk);
1231
1232static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1233{
1234        char event[] = "DISK_RO=1";
1235        char *envp[] = { event, NULL };
1236
1237        if (!ro)
1238                event[8] = '0';
1239        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1240}
1241
1242void set_device_ro(struct block_device *bdev, int flag)
1243{
1244        bdev->bd_part->policy = flag;
1245}
1246
1247EXPORT_SYMBOL(set_device_ro);
1248
1249void set_disk_ro(struct gendisk *disk, int flag)
1250{
1251        struct disk_part_iter piter;
1252        struct hd_struct *part;
1253
1254        if (disk->part0.policy != flag) {
1255                set_disk_ro_uevent(disk, flag);
1256                disk->part0.policy = flag;
1257        }
1258
1259        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
1260        while ((part = disk_part_iter_next(&piter)))
1261                part->policy = flag;
1262        disk_part_iter_exit(&piter);
1263}
1264
1265EXPORT_SYMBOL(set_disk_ro);
1266
1267int bdev_read_only(struct block_device *bdev)
1268{
1269        if (!bdev)
1270                return 0;
1271        return bdev->bd_part->policy;
1272}
1273
1274EXPORT_SYMBOL(bdev_read_only);
1275
1276int invalidate_partition(struct gendisk *disk, int partno)
1277{
1278        int res = 0;
1279        struct block_device *bdev = bdget_disk(disk, partno);
1280        if (bdev) {
1281                fsync_bdev(bdev);
1282                res = __invalidate_device(bdev);
1283                bdput(bdev);
1284        }
1285        return res;
1286}
1287
1288EXPORT_SYMBOL(invalidate_partition);
1289