linux/block/genhd.c
<<
>>
Prefs
   1/*
   2 *  gendisk handling
   3 */
   4
   5#include <linux/module.h>
   6#include <linux/fs.h>
   7#include <linux/genhd.h>
   8#include <linux/kdev_t.h>
   9#include <linux/kernel.h>
  10#include <linux/blkdev.h>
  11#include <linux/init.h>
  12#include <linux/spinlock.h>
  13#include <linux/proc_fs.h>
  14#include <linux/seq_file.h>
  15#include <linux/slab.h>
  16#include <linux/kmod.h>
  17#include <linux/kobj_map.h>
  18#include <linux/buffer_head.h>
  19#include <linux/mutex.h>
  20#include <linux/idr.h>
  21
  22#include "blk.h"
  23
  24static DEFINE_MUTEX(block_class_lock);
  25#ifndef CONFIG_SYSFS_DEPRECATED
  26struct kobject *block_depr;
  27#endif
  28
  29/* for extended dynamic devt allocation, currently only one major is used */
  30#define MAX_EXT_DEVT            (1 << MINORBITS)
  31
  32/* For extended devt allocation.  ext_devt_mutex prevents look up
  33 * results from going away underneath its user.
  34 */
  35static DEFINE_MUTEX(ext_devt_mutex);
  36static DEFINE_IDR(ext_devt_idr);
  37
  38static struct device_type disk_type;
  39
  40/**
  41 * disk_get_part - get partition
  42 * @disk: disk to look partition from
  43 * @partno: partition number
  44 *
  45 * Look for partition @partno from @disk.  If found, increment
  46 * reference count and return it.
  47 *
  48 * CONTEXT:
  49 * Don't care.
  50 *
  51 * RETURNS:
  52 * Pointer to the found partition on success, NULL if not found.
  53 */
  54struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
  55{
  56        struct hd_struct *part = NULL;
  57        struct disk_part_tbl *ptbl;
  58
  59        if (unlikely(partno < 0))
  60                return NULL;
  61
  62        rcu_read_lock();
  63
  64        ptbl = rcu_dereference(disk->part_tbl);
  65        if (likely(partno < ptbl->len)) {
  66                part = rcu_dereference(ptbl->part[partno]);
  67                if (part)
  68                        get_device(part_to_dev(part));
  69        }
  70
  71        rcu_read_unlock();
  72
  73        return part;
  74}
  75EXPORT_SYMBOL_GPL(disk_get_part);
  76
  77/**
  78 * disk_part_iter_init - initialize partition iterator
  79 * @piter: iterator to initialize
  80 * @disk: disk to iterate over
  81 * @flags: DISK_PITER_* flags
  82 *
  83 * Initialize @piter so that it iterates over partitions of @disk.
  84 *
  85 * CONTEXT:
  86 * Don't care.
  87 */
  88void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
  89                          unsigned int flags)
  90{
  91        struct disk_part_tbl *ptbl;
  92
  93        rcu_read_lock();
  94        ptbl = rcu_dereference(disk->part_tbl);
  95
  96        piter->disk = disk;
  97        piter->part = NULL;
  98
  99        if (flags & DISK_PITER_REVERSE)
 100                piter->idx = ptbl->len - 1;
 101        else if (flags & DISK_PITER_INCL_PART0)
 102                piter->idx = 0;
 103        else
 104                piter->idx = 1;
 105
 106        piter->flags = flags;
 107
 108        rcu_read_unlock();
 109}
 110EXPORT_SYMBOL_GPL(disk_part_iter_init);
 111
 112/**
 113 * disk_part_iter_next - proceed iterator to the next partition and return it
 114 * @piter: iterator of interest
 115 *
 116 * Proceed @piter to the next partition and return it.
 117 *
 118 * CONTEXT:
 119 * Don't care.
 120 */
 121struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
 122{
 123        struct disk_part_tbl *ptbl;
 124        int inc, end;
 125
 126        /* put the last partition */
 127        disk_put_part(piter->part);
 128        piter->part = NULL;
 129
 130        /* get part_tbl */
 131        rcu_read_lock();
 132        ptbl = rcu_dereference(piter->disk->part_tbl);
 133
 134        /* determine iteration parameters */
 135        if (piter->flags & DISK_PITER_REVERSE) {
 136                inc = -1;
 137                if (piter->flags & DISK_PITER_INCL_PART0)
 138                        end = -1;
 139                else
 140                        end = 0;
 141        } else {
 142                inc = 1;
 143                end = ptbl->len;
 144        }
 145
 146        /* iterate to the next partition */
 147        for (; piter->idx != end; piter->idx += inc) {
 148                struct hd_struct *part;
 149
 150                part = rcu_dereference(ptbl->part[piter->idx]);
 151                if (!part)
 152                        continue;
 153                if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
 154                        continue;
 155
 156                get_device(part_to_dev(part));
 157                piter->part = part;
 158                piter->idx += inc;
 159                break;
 160        }
 161
 162        rcu_read_unlock();
 163
 164        return piter->part;
 165}
 166EXPORT_SYMBOL_GPL(disk_part_iter_next);
 167
 168/**
 169 * disk_part_iter_exit - finish up partition iteration
 170 * @piter: iter of interest
 171 *
 172 * Called when iteration is over.  Cleans up @piter.
 173 *
 174 * CONTEXT:
 175 * Don't care.
 176 */
 177void disk_part_iter_exit(struct disk_part_iter *piter)
 178{
 179        disk_put_part(piter->part);
 180        piter->part = NULL;
 181}
 182EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 183
 184static inline int sector_in_part(struct hd_struct *part, sector_t sector)
 185{
 186        return part->start_sect <= sector &&
 187                sector < part->start_sect + part->nr_sects;
 188}
 189
 190/**
 191 * disk_map_sector_rcu - map sector to partition
 192 * @disk: gendisk of interest
 193 * @sector: sector to map
 194 *
 195 * Find out which partition @sector maps to on @disk.  This is
 196 * primarily used for stats accounting.
 197 *
 198 * CONTEXT:
 199 * RCU read locked.  The returned partition pointer is valid only
 200 * while preemption is disabled.
 201 *
 202 * RETURNS:
 203 * Found partition on success, part0 is returned if no partition matches
 204 */
 205struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 206{
 207        struct disk_part_tbl *ptbl;
 208        struct hd_struct *part;
 209        int i;
 210
 211        ptbl = rcu_dereference(disk->part_tbl);
 212
 213        part = rcu_dereference(ptbl->last_lookup);
 214        if (part && sector_in_part(part, sector))
 215                return part;
 216
 217        for (i = 1; i < ptbl->len; i++) {
 218                part = rcu_dereference(ptbl->part[i]);
 219
 220                if (part && sector_in_part(part, sector)) {
 221                        rcu_assign_pointer(ptbl->last_lookup, part);
 222                        return part;
 223                }
 224        }
 225        return &disk->part0;
 226}
 227EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
 228
 229/*
 230 * Can be deleted altogether. Later.
 231 *
 232 */
 233static struct blk_major_name {
 234        struct blk_major_name *next;
 235        int major;
 236        char name[16];
 237} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 238
 239/* index in the above - for now: assume no multimajor ranges */
 240static inline int major_to_index(int major)
 241{
 242        return major % BLKDEV_MAJOR_HASH_SIZE;
 243}
 244
 245#ifdef CONFIG_PROC_FS
 246void blkdev_show(struct seq_file *seqf, off_t offset)
 247{
 248        struct blk_major_name *dp;
 249
 250        if (offset < BLKDEV_MAJOR_HASH_SIZE) {
 251                mutex_lock(&block_class_lock);
 252                for (dp = major_names[offset]; dp; dp = dp->next)
 253                        seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 254                mutex_unlock(&block_class_lock);
 255        }
 256}
 257#endif /* CONFIG_PROC_FS */
 258
 259/**
 260 * register_blkdev - register a new block device
 261 *
 262 * @major: the requested major device number [1..255]. If @major=0, try to
 263 *         allocate any unused major number.
 264 * @name: the name of the new block device as a zero terminated string
 265 *
 266 * The @name must be unique within the system.
 267 *
 268 * The return value depends on the @major input parameter.
 269 *  - if a major device number was requested in range [1..255] then the
 270 *    function returns zero on success, or a negative error code
 271 *  - if any unused major number was requested with @major=0 parameter
 272 *    then the return value is the allocated major number in range
 273 *    [1..255] or a negative error code otherwise
 274 */
 275int register_blkdev(unsigned int major, const char *name)
 276{
 277        struct blk_major_name **n, *p;
 278        int index, ret = 0;
 279
 280        mutex_lock(&block_class_lock);
 281
 282        /* temporary */
 283        if (major == 0) {
 284                for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
 285                        if (major_names[index] == NULL)
 286                                break;
 287                }
 288
 289                if (index == 0) {
 290                        printk("register_blkdev: failed to get major for %s\n",
 291                               name);
 292                        ret = -EBUSY;
 293                        goto out;
 294                }
 295                major = index;
 296                ret = major;
 297        }
 298
 299        p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
 300        if (p == NULL) {
 301                ret = -ENOMEM;
 302                goto out;
 303        }
 304
 305        p->major = major;
 306        strlcpy(p->name, name, sizeof(p->name));
 307        p->next = NULL;
 308        index = major_to_index(major);
 309
 310        for (n = &major_names[index]; *n; n = &(*n)->next) {
 311                if ((*n)->major == major)
 312                        break;
 313        }
 314        if (!*n)
 315                *n = p;
 316        else
 317                ret = -EBUSY;
 318
 319        if (ret < 0) {
 320                printk("register_blkdev: cannot get major %d for %s\n",
 321                       major, name);
 322                kfree(p);
 323        }
 324out:
 325        mutex_unlock(&block_class_lock);
 326        return ret;
 327}
 328
 329EXPORT_SYMBOL(register_blkdev);
 330
 331void unregister_blkdev(unsigned int major, const char *name)
 332{
 333        struct blk_major_name **n;
 334        struct blk_major_name *p = NULL;
 335        int index = major_to_index(major);
 336
 337        mutex_lock(&block_class_lock);
 338        for (n = &major_names[index]; *n; n = &(*n)->next)
 339                if ((*n)->major == major)
 340                        break;
 341        if (!*n || strcmp((*n)->name, name)) {
 342                WARN_ON(1);
 343        } else {
 344                p = *n;
 345                *n = p->next;
 346        }
 347        mutex_unlock(&block_class_lock);
 348        kfree(p);
 349}
 350
 351EXPORT_SYMBOL(unregister_blkdev);
 352
 353static struct kobj_map *bdev_map;
 354
 355/**
 356 * blk_mangle_minor - scatter minor numbers apart
 357 * @minor: minor number to mangle
 358 *
 359 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 360 * is enabled.  Mangling twice gives the original value.
 361 *
 362 * RETURNS:
 363 * Mangled value.
 364 *
 365 * CONTEXT:
 366 * Don't care.
 367 */
 368static int blk_mangle_minor(int minor)
 369{
 370#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
 371        int i;
 372
 373        for (i = 0; i < MINORBITS / 2; i++) {
 374                int low = minor & (1 << i);
 375                int high = minor & (1 << (MINORBITS - 1 - i));
 376                int distance = MINORBITS - 1 - 2 * i;
 377
 378                minor ^= low | high;    /* clear both bits */
 379                low <<= distance;       /* swap the positions */
 380                high >>= distance;
 381                minor |= low | high;    /* and set */
 382        }
 383#endif
 384        return minor;
 385}
 386
 387/**
 388 * blk_alloc_devt - allocate a dev_t for a partition
 389 * @part: partition to allocate dev_t for
 390 * @devt: out parameter for resulting dev_t
 391 *
 392 * Allocate a dev_t for block device.
 393 *
 394 * RETURNS:
 395 * 0 on success, allocated dev_t is returned in *@devt.  -errno on
 396 * failure.
 397 *
 398 * CONTEXT:
 399 * Might sleep.
 400 */
 401int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
 402{
 403        struct gendisk *disk = part_to_disk(part);
 404        int idx, rc;
 405
 406        /* in consecutive minor range? */
 407        if (part->partno < disk->minors) {
 408                *devt = MKDEV(disk->major, disk->first_minor + part->partno);
 409                return 0;
 410        }
 411
 412        /* allocate ext devt */
 413        do {
 414                if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
 415                        return -ENOMEM;
 416                rc = idr_get_new(&ext_devt_idr, part, &idx);
 417        } while (rc == -EAGAIN);
 418
 419        if (rc)
 420                return rc;
 421
 422        if (idx > MAX_EXT_DEVT) {
 423                idr_remove(&ext_devt_idr, idx);
 424                return -EBUSY;
 425        }
 426
 427        *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
 428        return 0;
 429}
 430
 431/**
 432 * blk_free_devt - free a dev_t
 433 * @devt: dev_t to free
 434 *
 435 * Free @devt which was allocated using blk_alloc_devt().
 436 *
 437 * CONTEXT:
 438 * Might sleep.
 439 */
 440void blk_free_devt(dev_t devt)
 441{
 442        might_sleep();
 443
 444        if (devt == MKDEV(0, 0))
 445                return;
 446
 447        if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
 448                mutex_lock(&ext_devt_mutex);
 449                idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 450                mutex_unlock(&ext_devt_mutex);
 451        }
 452}
 453
 454static char *bdevt_str(dev_t devt, char *buf)
 455{
 456        if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
 457                char tbuf[BDEVT_SIZE];
 458                snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
 459                snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
 460        } else
 461                snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
 462
 463        return buf;
 464}
 465
 466/*
 467 * Register device numbers dev..(dev+range-1)
 468 * range must be nonzero
 469 * The hash chain is sorted on range, so that subranges can override.
 470 */
 471void blk_register_region(dev_t devt, unsigned long range, struct module *module,
 472                         struct kobject *(*probe)(dev_t, int *, void *),
 473                         int (*lock)(dev_t, void *), void *data)
 474{
 475        kobj_map(bdev_map, devt, range, module, probe, lock, data);
 476}
 477
 478EXPORT_SYMBOL(blk_register_region);
 479
 480void blk_unregister_region(dev_t devt, unsigned long range)
 481{
 482        kobj_unmap(bdev_map, devt, range);
 483}
 484
 485EXPORT_SYMBOL(blk_unregister_region);
 486
 487static struct kobject *exact_match(dev_t devt, int *partno, void *data)
 488{
 489        struct gendisk *p = data;
 490
 491        return &disk_to_dev(p)->kobj;
 492}
 493
 494static int exact_lock(dev_t devt, void *data)
 495{
 496        struct gendisk *p = data;
 497
 498        if (!get_disk(p))
 499                return -1;
 500        return 0;
 501}
 502
 503/**
 504 * add_disk - add partitioning information to kernel list
 505 * @disk: per-device partitioning information
 506 *
 507 * This function registers the partitioning information in @disk
 508 * with the kernel.
 509 *
 510 * FIXME: error handling
 511 */
 512void add_disk(struct gendisk *disk)
 513{
 514        struct backing_dev_info *bdi;
 515        dev_t devt;
 516        int retval;
 517
 518        /* minors == 0 indicates to use ext devt from part0 and should
 519         * be accompanied with EXT_DEVT flag.  Make sure all
 520         * parameters make sense.
 521         */
 522        WARN_ON(disk->minors && !(disk->major || disk->first_minor));
 523        WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
 524
 525        disk->flags |= GENHD_FL_UP;
 526
 527        retval = blk_alloc_devt(&disk->part0, &devt);
 528        if (retval) {
 529                WARN_ON(1);
 530                return;
 531        }
 532        disk_to_dev(disk)->devt = devt;
 533
 534        /* ->major and ->first_minor aren't supposed to be
 535         * dereferenced from here on, but set them just in case.
 536         */
 537        disk->major = MAJOR(devt);
 538        disk->first_minor = MINOR(devt);
 539
 540        blk_register_region(disk_devt(disk), disk->minors, NULL,
 541                            exact_match, exact_lock, disk);
 542        register_disk(disk);
 543        blk_register_queue(disk);
 544
 545        bdi = &disk->queue->backing_dev_info;
 546        bdi_register_dev(bdi, disk_devt(disk));
 547        retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 548                                   "bdi");
 549        WARN_ON(retval);
 550}
 551
 552EXPORT_SYMBOL(add_disk);
 553EXPORT_SYMBOL(del_gendisk);     /* in partitions/check.c */
 554
 555void unlink_gendisk(struct gendisk *disk)
 556{
 557        sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 558        bdi_unregister(&disk->queue->backing_dev_info);
 559        blk_unregister_queue(disk);
 560        blk_unregister_region(disk_devt(disk), disk->minors);
 561}
 562
 563/**
 564 * get_gendisk - get partitioning information for a given device
 565 * @devt: device to get partitioning information for
 566 * @partno: returned partition index
 567 *
 568 * This function gets the structure containing partitioning
 569 * information for the given device @devt.
 570 */
 571struct gendisk *get_gendisk(dev_t devt, int *partno)
 572{
 573        struct gendisk *disk = NULL;
 574
 575        if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
 576                struct kobject *kobj;
 577
 578                kobj = kobj_lookup(bdev_map, devt, partno);
 579                if (kobj)
 580                        disk = dev_to_disk(kobj_to_dev(kobj));
 581        } else {
 582                struct hd_struct *part;
 583
 584                mutex_lock(&ext_devt_mutex);
 585                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 586                if (part && get_disk(part_to_disk(part))) {
 587                        *partno = part->partno;
 588                        disk = part_to_disk(part);
 589                }
 590                mutex_unlock(&ext_devt_mutex);
 591        }
 592
 593        return disk;
 594}
 595
 596/**
 597 * bdget_disk - do bdget() by gendisk and partition number
 598 * @disk: gendisk of interest
 599 * @partno: partition number
 600 *
 601 * Find partition @partno from @disk, do bdget() on it.
 602 *
 603 * CONTEXT:
 604 * Don't care.
 605 *
 606 * RETURNS:
 607 * Resulting block_device on success, NULL on failure.
 608 */
 609struct block_device *bdget_disk(struct gendisk *disk, int partno)
 610{
 611        struct hd_struct *part;
 612        struct block_device *bdev = NULL;
 613
 614        part = disk_get_part(disk, partno);
 615        if (part)
 616                bdev = bdget(part_devt(part));
 617        disk_put_part(part);
 618
 619        return bdev;
 620}
 621EXPORT_SYMBOL(bdget_disk);
 622
 623/*
 624 * print a full list of all partitions - intended for places where the root
 625 * filesystem can't be mounted and thus to give the victim some idea of what
 626 * went wrong
 627 */
 628void __init printk_all_partitions(void)
 629{
 630        struct class_dev_iter iter;
 631        struct device *dev;
 632
 633        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 634        while ((dev = class_dev_iter_next(&iter))) {
 635                struct gendisk *disk = dev_to_disk(dev);
 636                struct disk_part_iter piter;
 637                struct hd_struct *part;
 638                char name_buf[BDEVNAME_SIZE];
 639                char devt_buf[BDEVT_SIZE];
 640
 641                /*
 642                 * Don't show empty devices or things that have been
 643                 * surpressed
 644                 */
 645                if (get_capacity(disk) == 0 ||
 646                    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
 647                        continue;
 648
 649                /*
 650                 * Note, unlike /proc/partitions, I am showing the
 651                 * numbers in hex - the same format as the root=
 652                 * option takes.
 653                 */
 654                disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
 655                while ((part = disk_part_iter_next(&piter))) {
 656                        bool is_part0 = part == &disk->part0;
 657
 658                        printk("%s%s %10llu %s", is_part0 ? "" : "  ",
 659                               bdevt_str(part_devt(part), devt_buf),
 660                               (unsigned long long)part->nr_sects >> 1,
 661                               disk_name(disk, part->partno, name_buf));
 662                        if (is_part0) {
 663                                if (disk->driverfs_dev != NULL &&
 664                                    disk->driverfs_dev->driver != NULL)
 665                                        printk(" driver: %s\n",
 666                                              disk->driverfs_dev->driver->name);
 667                                else
 668                                        printk(" (driver?)\n");
 669                        } else
 670                                printk("\n");
 671                }
 672                disk_part_iter_exit(&piter);
 673        }
 674        class_dev_iter_exit(&iter);
 675}
 676
 677#ifdef CONFIG_PROC_FS
 678/* iterator */
 679static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 680{
 681        loff_t skip = *pos;
 682        struct class_dev_iter *iter;
 683        struct device *dev;
 684
 685        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 686        if (!iter)
 687                return ERR_PTR(-ENOMEM);
 688
 689        seqf->private = iter;
 690        class_dev_iter_init(iter, &block_class, NULL, &disk_type);
 691        do {
 692                dev = class_dev_iter_next(iter);
 693                if (!dev)
 694                        return NULL;
 695        } while (skip--);
 696
 697        return dev_to_disk(dev);
 698}
 699
 700static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 701{
 702        struct device *dev;
 703
 704        (*pos)++;
 705        dev = class_dev_iter_next(seqf->private);
 706        if (dev)
 707                return dev_to_disk(dev);
 708
 709        return NULL;
 710}
 711
 712static void disk_seqf_stop(struct seq_file *seqf, void *v)
 713{
 714        struct class_dev_iter *iter = seqf->private;
 715
 716        /* stop is called even after start failed :-( */
 717        if (iter) {
 718                class_dev_iter_exit(iter);
 719                kfree(iter);
 720        }
 721}
 722
 723static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 724{
 725        static void *p;
 726
 727        p = disk_seqf_start(seqf, pos);
 728        if (!IS_ERR(p) && p && !*pos)
 729                seq_puts(seqf, "major minor  #blocks  name\n\n");
 730        return p;
 731}
 732
 733static int show_partition(struct seq_file *seqf, void *v)
 734{
 735        struct gendisk *sgp = v;
 736        struct disk_part_iter piter;
 737        struct hd_struct *part;
 738        char buf[BDEVNAME_SIZE];
 739
 740        /* Don't show non-partitionable removeable devices or empty devices */
 741        if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
 742                                   (sgp->flags & GENHD_FL_REMOVABLE)))
 743                return 0;
 744        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
 745                return 0;
 746
 747        /* show the full disk and all non-0 size partitions of it */
 748        disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
 749        while ((part = disk_part_iter_next(&piter)))
 750                seq_printf(seqf, "%4d  %7d %10llu %s\n",
 751                           MAJOR(part_devt(part)), MINOR(part_devt(part)),
 752                           (unsigned long long)part->nr_sects >> 1,
 753                           disk_name(sgp, part->partno, buf));
 754        disk_part_iter_exit(&piter);
 755
 756        return 0;
 757}
 758
 759static const struct seq_operations partitions_op = {
 760        .start  = show_partition_start,
 761        .next   = disk_seqf_next,
 762        .stop   = disk_seqf_stop,
 763        .show   = show_partition
 764};
 765
 766static int partitions_open(struct inode *inode, struct file *file)
 767{
 768        return seq_open(file, &partitions_op);
 769}
 770
 771static const struct file_operations proc_partitions_operations = {
 772        .open           = partitions_open,
 773        .read           = seq_read,
 774        .llseek         = seq_lseek,
 775        .release        = seq_release,
 776};
 777#endif
 778
 779
 780static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 781{
 782        if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 783                /* Make old-style 2.4 aliases work */
 784                request_module("block-major-%d", MAJOR(devt));
 785        return NULL;
 786}
 787
 788static int __init genhd_device_init(void)
 789{
 790        int error;
 791
 792        block_class.dev_kobj = sysfs_dev_block_kobj;
 793        error = class_register(&block_class);
 794        if (unlikely(error))
 795                return error;
 796        bdev_map = kobj_map_init(base_probe, &block_class_lock);
 797        blk_dev_init();
 798
 799        register_blkdev(BLOCK_EXT_MAJOR, "blkext");
 800
 801#ifndef CONFIG_SYSFS_DEPRECATED
 802        /* create top-level block dir */
 803        block_depr = kobject_create_and_add("block", NULL);
 804#endif
 805        return 0;
 806}
 807
 808subsys_initcall(genhd_device_init);
 809
 810static ssize_t disk_range_show(struct device *dev,
 811                               struct device_attribute *attr, char *buf)
 812{
 813        struct gendisk *disk = dev_to_disk(dev);
 814
 815        return sprintf(buf, "%d\n", disk->minors);
 816}
 817
 818static ssize_t disk_ext_range_show(struct device *dev,
 819                                   struct device_attribute *attr, char *buf)
 820{
 821        struct gendisk *disk = dev_to_disk(dev);
 822
 823        return sprintf(buf, "%d\n", disk_max_parts(disk));
 824}
 825
 826static ssize_t disk_removable_show(struct device *dev,
 827                                   struct device_attribute *attr, char *buf)
 828{
 829        struct gendisk *disk = dev_to_disk(dev);
 830
 831        return sprintf(buf, "%d\n",
 832                       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
 833}
 834
 835static ssize_t disk_ro_show(struct device *dev,
 836                                   struct device_attribute *attr, char *buf)
 837{
 838        struct gendisk *disk = dev_to_disk(dev);
 839
 840        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 841}
 842
 843static ssize_t disk_capability_show(struct device *dev,
 844                                    struct device_attribute *attr, char *buf)
 845{
 846        struct gendisk *disk = dev_to_disk(dev);
 847
 848        return sprintf(buf, "%x\n", disk->flags);
 849}
 850
 851static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 852static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 853static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 854static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 855static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 856static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 857static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 858#ifdef CONFIG_FAIL_MAKE_REQUEST
 859static struct device_attribute dev_attr_fail =
 860        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 861#endif
 862#ifdef CONFIG_FAIL_IO_TIMEOUT
 863static struct device_attribute dev_attr_fail_timeout =
 864        __ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
 865                part_timeout_store);
 866#endif
 867
 868static struct attribute *disk_attrs[] = {
 869        &dev_attr_range.attr,
 870        &dev_attr_ext_range.attr,
 871        &dev_attr_removable.attr,
 872        &dev_attr_ro.attr,
 873        &dev_attr_size.attr,
 874        &dev_attr_capability.attr,
 875        &dev_attr_stat.attr,
 876#ifdef CONFIG_FAIL_MAKE_REQUEST
 877        &dev_attr_fail.attr,
 878#endif
 879#ifdef CONFIG_FAIL_IO_TIMEOUT
 880        &dev_attr_fail_timeout.attr,
 881#endif
 882        NULL
 883};
 884
 885static struct attribute_group disk_attr_group = {
 886        .attrs = disk_attrs,
 887};
 888
 889static struct attribute_group *disk_attr_groups[] = {
 890        &disk_attr_group,
 891        NULL
 892};
 893
 894static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
 895{
 896        struct disk_part_tbl *ptbl =
 897                container_of(head, struct disk_part_tbl, rcu_head);
 898
 899        kfree(ptbl);
 900}
 901
 902/**
 903 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
 904 * @disk: disk to replace part_tbl for
 905 * @new_ptbl: new part_tbl to install
 906 *
 907 * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
 908 * original ptbl is freed using RCU callback.
 909 *
 910 * LOCKING:
 911 * Matching bd_mutx locked.
 912 */
 913static void disk_replace_part_tbl(struct gendisk *disk,
 914                                  struct disk_part_tbl *new_ptbl)
 915{
 916        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 917
 918        rcu_assign_pointer(disk->part_tbl, new_ptbl);
 919
 920        if (old_ptbl) {
 921                rcu_assign_pointer(old_ptbl->last_lookup, NULL);
 922                call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
 923        }
 924}
 925
 926/**
 927 * disk_expand_part_tbl - expand disk->part_tbl
 928 * @disk: disk to expand part_tbl for
 929 * @partno: expand such that this partno can fit in
 930 *
 931 * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
 932 * uses RCU to allow unlocked dereferencing for stats and other stuff.
 933 *
 934 * LOCKING:
 935 * Matching bd_mutex locked, might sleep.
 936 *
 937 * RETURNS:
 938 * 0 on success, -errno on failure.
 939 */
 940int disk_expand_part_tbl(struct gendisk *disk, int partno)
 941{
 942        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 943        struct disk_part_tbl *new_ptbl;
 944        int len = old_ptbl ? old_ptbl->len : 0;
 945        int target = partno + 1;
 946        size_t size;
 947        int i;
 948
 949        /* disk_max_parts() is zero during initialization, ignore if so */
 950        if (disk_max_parts(disk) && target > disk_max_parts(disk))
 951                return -EINVAL;
 952
 953        if (target <= len)
 954                return 0;
 955
 956        size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
 957        new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
 958        if (!new_ptbl)
 959                return -ENOMEM;
 960
 961        INIT_RCU_HEAD(&new_ptbl->rcu_head);
 962        new_ptbl->len = target;
 963
 964        for (i = 0; i < len; i++)
 965                rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
 966
 967        disk_replace_part_tbl(disk, new_ptbl);
 968        return 0;
 969}
 970
 971static void disk_release(struct device *dev)
 972{
 973        struct gendisk *disk = dev_to_disk(dev);
 974
 975        kfree(disk->random);
 976        disk_replace_part_tbl(disk, NULL);
 977        free_part_stats(&disk->part0);
 978        kfree(disk);
 979}
 980struct class block_class = {
 981        .name           = "block",
 982};
 983
 984static struct device_type disk_type = {
 985        .name           = "disk",
 986        .groups         = disk_attr_groups,
 987        .release        = disk_release,
 988};
 989
 990#ifdef CONFIG_PROC_FS
 991/*
 992 * aggregate disk stat collector.  Uses the same stats that the sysfs
 993 * entries do, above, but makes them available through one seq_file.
 994 *
 995 * The output looks suspiciously like /proc/partitions with a bunch of
 996 * extra fields.
 997 */
 998static int diskstats_show(struct seq_file *seqf, void *v)
 999{
1000        struct gendisk *gp = v;
1001        struct disk_part_iter piter;
1002        struct hd_struct *hd;
1003        char buf[BDEVNAME_SIZE];
1004        int cpu;
1005
1006        /*
1007        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1008                seq_puts(seqf,  "major minor name"
1009                                "     rio rmerge rsect ruse wio wmerge "
1010                                "wsect wuse running use aveq"
1011                                "\n\n");
1012        */
1013 
1014        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0);
1015        while ((hd = disk_part_iter_next(&piter))) {
1016                cpu = part_stat_lock();
1017                part_round_stats(cpu, hd);
1018                part_stat_unlock();
1019                seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
1020                           "%u %lu %lu %llu %u %u %u %u\n",
1021                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1022                           disk_name(gp, hd->partno, buf),
1023                           part_stat_read(hd, ios[0]),
1024                           part_stat_read(hd, merges[0]),
1025                           (unsigned long long)part_stat_read(hd, sectors[0]),
1026                           jiffies_to_msecs(part_stat_read(hd, ticks[0])),
1027                           part_stat_read(hd, ios[1]),
1028                           part_stat_read(hd, merges[1]),
1029                           (unsigned long long)part_stat_read(hd, sectors[1]),
1030                           jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1031                           hd->in_flight,
1032                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1033                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1034                        );
1035        }
1036        disk_part_iter_exit(&piter);
1037 
1038        return 0;
1039}
1040
1041static const struct seq_operations diskstats_op = {
1042        .start  = disk_seqf_start,
1043        .next   = disk_seqf_next,
1044        .stop   = disk_seqf_stop,
1045        .show   = diskstats_show
1046};
1047
1048static int diskstats_open(struct inode *inode, struct file *file)
1049{
1050        return seq_open(file, &diskstats_op);
1051}
1052
1053static const struct file_operations proc_diskstats_operations = {
1054        .open           = diskstats_open,
1055        .read           = seq_read,
1056        .llseek         = seq_lseek,
1057        .release        = seq_release,
1058};
1059
1060static int __init proc_genhd_init(void)
1061{
1062        proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
1063        proc_create("partitions", 0, NULL, &proc_partitions_operations);
1064        return 0;
1065}
1066module_init(proc_genhd_init);
1067#endif /* CONFIG_PROC_FS */
1068
1069static void media_change_notify_thread(struct work_struct *work)
1070{
1071        struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1072        char event[] = "MEDIA_CHANGE=1";
1073        char *envp[] = { event, NULL };
1074
1075        /*
1076         * set enviroment vars to indicate which event this is for
1077         * so that user space will know to go check the media status.
1078         */
1079        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1080        put_device(gd->driverfs_dev);
1081}
1082
1083#if 0
1084void genhd_media_change_notify(struct gendisk *disk)
1085{
1086        get_device(disk->driverfs_dev);
1087        schedule_work(&disk->async_notify);
1088}
1089EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1090#endif  /*  0  */
1091
1092dev_t blk_lookup_devt(const char *name, int partno)
1093{
1094        dev_t devt = MKDEV(0, 0);
1095        struct class_dev_iter iter;
1096        struct device *dev;
1097
1098        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1099        while ((dev = class_dev_iter_next(&iter))) {
1100                struct gendisk *disk = dev_to_disk(dev);
1101                struct hd_struct *part;
1102
1103                if (strcmp(dev_name(dev), name))
1104                        continue;
1105
1106                if (partno < disk->minors) {
1107                        /* We need to return the right devno, even
1108                         * if the partition doesn't exist yet.
1109                         */
1110                        devt = MKDEV(MAJOR(dev->devt),
1111                                     MINOR(dev->devt) + partno);
1112                        break;
1113                }
1114                part = disk_get_part(disk, partno);
1115                if (part) {
1116                        devt = part_devt(part);
1117                        disk_put_part(part);
1118                        break;
1119                }
1120                disk_put_part(part);
1121        }
1122        class_dev_iter_exit(&iter);
1123        return devt;
1124}
1125EXPORT_SYMBOL(blk_lookup_devt);
1126
1127struct gendisk *alloc_disk(int minors)
1128{
1129        return alloc_disk_node(minors, -1);
1130}
1131EXPORT_SYMBOL(alloc_disk);
1132
1133struct gendisk *alloc_disk_node(int minors, int node_id)
1134{
1135        struct gendisk *disk;
1136
1137        disk = kmalloc_node(sizeof(struct gendisk),
1138                                GFP_KERNEL | __GFP_ZERO, node_id);
1139        if (disk) {
1140                if (!init_part_stats(&disk->part0)) {
1141                        kfree(disk);
1142                        return NULL;
1143                }
1144                disk->node_id = node_id;
1145                if (disk_expand_part_tbl(disk, 0)) {
1146                        free_part_stats(&disk->part0);
1147                        kfree(disk);
1148                        return NULL;
1149                }
1150                disk->part_tbl->part[0] = &disk->part0;
1151
1152                disk->minors = minors;
1153                rand_initialize_disk(disk);
1154                disk_to_dev(disk)->class = &block_class;
1155                disk_to_dev(disk)->type = &disk_type;
1156                device_initialize(disk_to_dev(disk));
1157                INIT_WORK(&disk->async_notify,
1158                        media_change_notify_thread);
1159        }
1160        return disk;
1161}
1162EXPORT_SYMBOL(alloc_disk_node);
1163
1164struct kobject *get_disk(struct gendisk *disk)
1165{
1166        struct module *owner;
1167        struct kobject *kobj;
1168
1169        if (!disk->fops)
1170                return NULL;
1171        owner = disk->fops->owner;
1172        if (owner && !try_module_get(owner))
1173                return NULL;
1174        kobj = kobject_get(&disk_to_dev(disk)->kobj);
1175        if (kobj == NULL) {
1176                module_put(owner);
1177                return NULL;
1178        }
1179        return kobj;
1180
1181}
1182
1183EXPORT_SYMBOL(get_disk);
1184
1185void put_disk(struct gendisk *disk)
1186{
1187        if (disk)
1188                kobject_put(&disk_to_dev(disk)->kobj);
1189}
1190
1191EXPORT_SYMBOL(put_disk);
1192
1193void set_device_ro(struct block_device *bdev, int flag)
1194{
1195        bdev->bd_part->policy = flag;
1196}
1197
1198EXPORT_SYMBOL(set_device_ro);
1199
1200void set_disk_ro(struct gendisk *disk, int flag)
1201{
1202        struct disk_part_iter piter;
1203        struct hd_struct *part;
1204
1205        disk_part_iter_init(&piter, disk,
1206                            DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
1207        while ((part = disk_part_iter_next(&piter)))
1208                part->policy = flag;
1209        disk_part_iter_exit(&piter);
1210}
1211
1212EXPORT_SYMBOL(set_disk_ro);
1213
1214int bdev_read_only(struct block_device *bdev)
1215{
1216        if (!bdev)
1217                return 0;
1218        return bdev->bd_part->policy;
1219}
1220
1221EXPORT_SYMBOL(bdev_read_only);
1222
1223int invalidate_partition(struct gendisk *disk, int partno)
1224{
1225        int res = 0;
1226        struct block_device *bdev = bdget_disk(disk, partno);
1227        if (bdev) {
1228                fsync_bdev(bdev);
1229                res = __invalidate_device(bdev);
1230                bdput(bdev);
1231        }
1232        return res;
1233}
1234
1235EXPORT_SYMBOL(invalidate_partition);
1236
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.