linux/drivers/block/loop.c
<<
>>
Prefs
   1/*
   2 *  linux/drivers/block/loop.c
   3 *
   4 *  Written by Theodore Ts'o, 3/29/93
   5 *
   6 * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
   7 * permitted under the GNU General Public License.
   8 *
   9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
  10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
  11 *
  12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
  13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
  14 *
  15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
  16 *
  17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
  18 *
  19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
  20 *
  21 * Loadable modules and other fixes by AK, 1998
  22 *
  23 * Make real block number available to downstream transfer functions, enables
  24 * CBC (and relatives) mode encryption requiring unique IVs per data block.
  25 * Reed H. Petty, rhp@draper.net
  26 *
  27 * Maximum number of loop devices now dynamic via max_loop module parameter.
  28 * Russell Kroll <rkroll@exploits.org> 19990701
  29 *
  30 * Maximum number of loop devices when compiled-in now selectable by passing
  31 * max_loop=<1-255> to the kernel on boot.
  32 * Erik I. Bols\xC3\xB8, <eriki@himolde.no>, Oct 31, 1999
  33 *
  34 * Completely rewrite request handling to be make_request_fn style and
  35 * non blocking, pushing work to a helper thread. Lots of fixes from
  36 * Al Viro too.
  37 * Jens Axboe <axboe@suse.de>, Nov 2000
  38 *
  39 * Support up to 256 loop devices
  40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
  41 *
  42 * Support for falling back on the write file operation when the address space
  43 * operations prepare_write and/or commit_write are not available on the
  44 * backing filesystem.
  45 * Anton Altaparmakov, 16 Feb 2005
  46 *
  47 * Still To Fix:
  48 * - Advisory locking is ignored here.
  49 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  50 *
  51 */
  52
  53#include <linux/module.h>
  54#include <linux/moduleparam.h>
  55#include <linux/sched.h>
  56#include <linux/fs.h>
  57#include <linux/file.h>
  58#include <linux/stat.h>
  59#include <linux/errno.h>
  60#include <linux/major.h>
  61#include <linux/wait.h>
  62#include <linux/blkdev.h>
  63#include <linux/blkpg.h>
  64#include <linux/init.h>
  65#include <linux/smp_lock.h>
  66#include <linux/swap.h>
  67#include <linux/slab.h>
  68#include <linux/loop.h>
  69#include <linux/compat.h>
  70#include <linux/suspend.h>
  71#include <linux/freezer.h>
  72#include <linux/writeback.h>
  73#include <linux/buffer_head.h>          /* for invalidate_bdev() */
  74#include <linux/completion.h>
  75#include <linux/highmem.h>
  76#include <linux/gfp.h>
  77#include <linux/kthread.h>
  78#include <linux/splice.h>
  79
  80#include <asm/uaccess.h>
  81
  82static LIST_HEAD(loop_devices);
  83static DEFINE_MUTEX(loop_devices_mutex);
  84
  85static int max_part;
  86static int part_shift;
  87
  88/*
  89 * Transfer functions
  90 */
  91static int transfer_none(struct loop_device *lo, int cmd,
  92                         struct page *raw_page, unsigned raw_off,
  93                         struct page *loop_page, unsigned loop_off,
  94                         int size, sector_t real_block)
  95{
  96        char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  97        char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  98
  99        if (cmd == READ)
 100                memcpy(loop_buf, raw_buf, size);
 101        else
 102                memcpy(raw_buf, loop_buf, size);
 103
 104        kunmap_atomic(raw_buf, KM_USER0);
 105        kunmap_atomic(loop_buf, KM_USER1);
 106        cond_resched();
 107        return 0;
 108}
 109
 110static int transfer_xor(struct loop_device *lo, int cmd,
 111                        struct page *raw_page, unsigned raw_off,
 112                        struct page *loop_page, unsigned loop_off,
 113                        int size, sector_t real_block)
 114{
 115        char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
 116        char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
 117        char *in, *out, *key;
 118        int i, keysize;
 119
 120        if (cmd == READ) {
 121                in = raw_buf;
 122                out = loop_buf;
 123        } else {
 124                in = loop_buf;
 125                out = raw_buf;
 126        }
 127
 128        key = lo->lo_encrypt_key;
 129        keysize = lo->lo_encrypt_key_size;
 130        for (i = 0; i < size; i++)
 131                *out++ = *in++ ^ key[(i & 511) % keysize];
 132
 133        kunmap_atomic(raw_buf, KM_USER0);
 134        kunmap_atomic(loop_buf, KM_USER1);
 135        cond_resched();
 136        return 0;
 137}
 138
 139static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
 140{
 141        if (unlikely(info->lo_encrypt_key_size <= 0))
 142                return -EINVAL;
 143        return 0;
 144}
 145
 146static struct loop_func_table none_funcs = {
 147        .number = LO_CRYPT_NONE,
 148        .transfer = transfer_none,
 149};      
 150
 151static struct loop_func_table xor_funcs = {
 152        .number = LO_CRYPT_XOR,
 153        .transfer = transfer_xor,
 154        .init = xor_init
 155};      
 156
 157/* xfer_funcs[0] is special - its release function is never called */
 158static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
 159        &none_funcs,
 160        &xor_funcs
 161};
 162
 163static loff_t get_loop_size(struct loop_device *lo, struct file *file)
 164{
 165        loff_t size, offset, loopsize;
 166
 167        /* Compute loopsize in bytes */
 168        size = i_size_read(file->f_mapping->host);
 169        offset = lo->lo_offset;
 170        loopsize = size - offset;
 171        if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
 172                loopsize = lo->lo_sizelimit;
 173
 174        /*
 175         * Unfortunately, if we want to do I/O on the device,
 176         * the number of 512-byte sectors has to fit into a sector_t.
 177         */
 178        return loopsize >> 9;
 179}
 180
 181static int
 182figure_loop_size(struct loop_device *lo)
 183{
 184        loff_t size = get_loop_size(lo, lo->lo_backing_file);
 185        sector_t x = (sector_t)size;
 186
 187        if (unlikely((loff_t)x != size))
 188                return -EFBIG;
 189
 190        set_capacity(lo->lo_disk, x);
 191        return 0;                                       
 192}
 193
 194static inline int
 195lo_do_transfer(struct loop_device *lo, int cmd,
 196               struct page *rpage, unsigned roffs,
 197               struct page *lpage, unsigned loffs,
 198               int size, sector_t rblock)
 199{
 200        if (unlikely(!lo->transfer))
 201                return 0;
 202
 203        return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 204}
 205
 206/**
 207 * do_lo_send_aops - helper for writing data to a loop device
 208 *
 209 * This is the fast version for backing filesystems which implement the address
 210 * space operations write_begin and write_end.
 211 */
 212static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 213                int bsize, loff_t pos, struct page *unused)
 214{
 215        struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 216        struct address_space *mapping = file->f_mapping;
 217        pgoff_t index;
 218        unsigned offset, bv_offs;
 219        int len, ret;
 220
 221        mutex_lock(&mapping->host->i_mutex);
 222        index = pos >> PAGE_CACHE_SHIFT;
 223        offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
 224        bv_offs = bvec->bv_offset;
 225        len = bvec->bv_len;
 226        while (len > 0) {
 227                sector_t IV;
 228                unsigned size, copied;
 229                int transfer_result;
 230                struct page *page;
 231                void *fsdata;
 232
 233                IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
 234                size = PAGE_CACHE_SIZE - offset;
 235                if (size > len)
 236                        size = len;
 237
 238                ret = pagecache_write_begin(file, mapping, pos, size, 0,
 239                                                        &page, &fsdata);
 240                if (ret)
 241                        goto fail;
 242
 243                transfer_result = lo_do_transfer(lo, WRITE, page, offset,
 244                                bvec->bv_page, bv_offs, size, IV);
 245                copied = size;
 246                if (unlikely(transfer_result))
 247                        copied = 0;
 248
 249                ret = pagecache_write_end(file, mapping, pos, size, copied,
 250                                                        page, fsdata);
 251                if (ret < 0 || ret != copied)
 252                        goto fail;
 253
 254                if (unlikely(transfer_result))
 255                        goto fail;
 256
 257                bv_offs += copied;
 258                len -= copied;
 259                offset = 0;
 260                index++;
 261                pos += copied;
 262        }
 263        ret = 0;
 264out:
 265        mutex_unlock(&mapping->host->i_mutex);
 266        return ret;
 267fail:
 268        ret = -1;
 269        goto out;
 270}
 271
 272/**
 273 * __do_lo_send_write - helper for writing data to a loop device
 274 *
 275 * This helper just factors out common code between do_lo_send_direct_write()
 276 * and do_lo_send_write().
 277 */
 278static int __do_lo_send_write(struct file *file,
 279                u8 *buf, const int len, loff_t pos)
 280{
 281        ssize_t bw;
 282        mm_segment_t old_fs = get_fs();
 283
 284        set_fs(get_ds());
 285        bw = file->f_op->write(file, buf, len, &pos);
 286        set_fs(old_fs);
 287        if (likely(bw == len))
 288                return 0;
 289        printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
 290                        (unsigned long long)pos, len);
 291        if (bw >= 0)
 292                bw = -EIO;
 293        return bw;
 294}
 295
 296/**
 297 * do_lo_send_direct_write - helper for writing data to a loop device
 298 *
 299 * This is the fast, non-transforming version for backing filesystems which do
 300 * not implement the address space operations write_begin and write_end.
 301 * It uses the write file operation which should be present on all writeable
 302 * filesystems.
 303 */
 304static int do_lo_send_direct_write(struct loop_device *lo,
 305                struct bio_vec *bvec, int bsize, loff_t pos, struct page *page)
 306{
 307        ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
 308                        kmap(bvec->bv_page) + bvec->bv_offset,
 309                        bvec->bv_len, pos);
 310        kunmap(bvec->bv_page);
 311        cond_resched();
 312        return bw;
 313}
 314
 315/**
 316 * do_lo_send_write - helper for writing data to a loop device
 317 *
 318 * This is the slow, transforming version for filesystems which do not
 319 * implement the address space operations write_begin and write_end.  It
 320 * uses the write file operation which should be present on all writeable
 321 * filesystems.
 322 *
 323 * Using fops->write is slower than using aops->{prepare,commit}_write in the
 324 * transforming case because we need to double buffer the data as we cannot do
 325 * the transformations in place as we do not have direct access to the
 326 * destination pages of the backing file.
 327 */
 328static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
 329                int bsize, loff_t pos, struct page *page)
 330{
 331        int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
 332                        bvec->bv_offset, bvec->bv_len, pos >> 9);
 333        if (likely(!ret))
 334                return __do_lo_send_write(lo->lo_backing_file,
 335                                page_address(page), bvec->bv_len,
 336                                pos);
 337        printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
 338                        "length %i.\n", (unsigned long long)pos, bvec->bv_len);
 339        if (ret > 0)
 340                ret = -EIO;
 341        return ret;
 342}
 343
 344static int lo_send(struct loop_device *lo, struct bio *bio, int bsize,
 345                loff_t pos)
 346{
 347        int (*do_lo_send)(struct loop_device *, struct bio_vec *, int, loff_t,
 348                        struct page *page);
 349        struct bio_vec *bvec;
 350        struct page *page = NULL;
 351        int i, ret = 0;
 352
 353        do_lo_send = do_lo_send_aops;
 354        if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
 355                do_lo_send = do_lo_send_direct_write;
 356                if (lo->transfer != transfer_none) {
 357                        page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
 358                        if (unlikely(!page))
 359                                goto fail;
 360                        kmap(page);
 361                        do_lo_send = do_lo_send_write;
 362                }
 363        }
 364        bio_for_each_segment(bvec, bio, i) {
 365                ret = do_lo_send(lo, bvec, bsize, pos, page);
 366                if (ret < 0)
 367                        break;
 368                pos += bvec->bv_len;
 369        }
 370        if (page) {
 371                kunmap(page);
 372                __free_page(page);
 373        }
 374out:
 375        return ret;
 376fail:
 377        printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
 378        ret = -ENOMEM;
 379        goto out;
 380}
 381
 382struct lo_read_data {
 383        struct loop_device *lo;
 384        struct page *page;
 385        unsigned offset;
 386        int bsize;
 387};
 388
 389static int
 390lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 391                struct splice_desc *sd)
 392{
 393        struct lo_read_data *p = sd->u.data;
 394        struct loop_device *lo = p->lo;
 395        struct page *page = buf->page;
 396        sector_t IV;
 397        size_t size;
 398        int ret;
 399
 400        ret = buf->ops->confirm(pipe, buf);
 401        if (unlikely(ret))
 402                return ret;
 403
 404        IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
 405                                                        (buf->offset >> 9);
 406        size = sd->len;
 407        if (size > p->bsize)
 408                size = p->bsize;
 409
 410        if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
 411                printk(KERN_ERR "loop: transfer error block %ld\n",
 412                       page->index);
 413                size = -EINVAL;
 414        }
 415
 416        flush_dcache_page(p->page);
 417
 418        if (size > 0)
 419                p->offset += size;
 420
 421        return size;
 422}
 423
 424static int
 425lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
 426{
 427        return __splice_from_pipe(pipe, sd, lo_splice_actor);
 428}
 429
 430static int
 431do_lo_receive(struct loop_device *lo,
 432              struct bio_vec *bvec, int bsize, loff_t pos)
 433{
 434        struct lo_read_data cookie;
 435        struct splice_desc sd;
 436        struct file *file;
 437        long retval;
 438
 439        cookie.lo = lo;
 440        cookie.page = bvec->bv_page;
 441        cookie.offset = bvec->bv_offset;
 442        cookie.bsize = bsize;
 443
 444        sd.len = 0;
 445        sd.total_len = bvec->bv_len;
 446        sd.flags = 0;
 447        sd.pos = pos;
 448        sd.u.data = &cookie;
 449
 450        file = lo->lo_backing_file;
 451        retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
 452
 453        if (retval < 0)
 454                return retval;
 455
 456        return 0;
 457}
 458
 459static int
 460lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 461{
 462        struct bio_vec *bvec;
 463        int i, ret = 0;
 464
 465        bio_for_each_segment(bvec, bio, i) {
 466                ret = do_lo_receive(lo, bvec, bsize, pos);
 467                if (ret < 0)
 468                        break;
 469                pos += bvec->bv_len;
 470        }
 471        return ret;
 472}
 473
 474static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 475{
 476        loff_t pos;
 477        int ret;
 478
 479        pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 480        if (bio_rw(bio) == WRITE)
 481                ret = lo_send(lo, bio, lo->lo_blocksize, pos);
 482        else
 483                ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
 484        return ret;
 485}
 486
 487/*
 488 * Add bio to back of pending list
 489 */
 490static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 491{
 492        if (lo->lo_biotail) {
 493                lo->lo_biotail->bi_next = bio;
 494                lo->lo_biotail = bio;
 495        } else
 496                lo->lo_bio = lo->lo_biotail = bio;
 497}
 498
 499/*
 500 * Grab first pending buffer
 501 */
 502static struct bio *loop_get_bio(struct loop_device *lo)
 503{
 504        struct bio *bio;
 505
 506        if ((bio = lo->lo_bio)) {
 507                if (bio == lo->lo_biotail)
 508                        lo->lo_biotail = NULL;
 509                lo->lo_bio = bio->bi_next;
 510                bio->bi_next = NULL;
 511        }
 512
 513        return bio;
 514}
 515
 516static int loop_make_request(struct request_queue *q, struct bio *old_bio)
 517{
 518        struct loop_device *lo = q->queuedata;
 519        int rw = bio_rw(old_bio);
 520
 521        if (rw == READA)
 522                rw = READ;
 523
 524        BUG_ON(!lo || (rw != READ && rw != WRITE));
 525
 526        spin_lock_irq(&lo->lo_lock);
 527        if (lo->lo_state != Lo_bound)
 528                goto out;
 529        if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 530                goto out;
 531        loop_add_bio(lo, old_bio);
 532        wake_up(&lo->lo_event);
 533        spin_unlock_irq(&lo->lo_lock);
 534        return 0;
 535
 536out:
 537        spin_unlock_irq(&lo->lo_lock);
 538        bio_io_error(old_bio);
 539        return 0;
 540}
 541
 542/*
 543 * kick off io on the underlying address space
 544 */
 545static void loop_unplug(struct request_queue *q)
 546{
 547        struct loop_device *lo = q->queuedata;
 548
 549        queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
 550        blk_run_address_space(lo->lo_backing_file->f_mapping);
 551}
 552
 553struct switch_request {
 554        struct file *file;
 555        struct completion wait;
 556};
 557
 558static void do_loop_switch(struct loop_device *, struct switch_request *);
 559
 560static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 561{
 562        if (unlikely(!bio->bi_bdev)) {
 563                do_loop_switch(lo, bio->bi_private);
 564                bio_put(bio);
 565        } else {
 566                int ret = do_bio_filebacked(lo, bio);
 567                bio_endio(bio, ret);
 568        }
 569}
 570
 571/*
 572 * worker thread that handles reads/writes to file backed loop devices,
 573 * to avoid blocking in our make_request_fn. it also does loop decrypting
 574 * on reads for block backed loop, as that is too heavy to do from
 575 * b_end_io context where irqs may be disabled.
 576 *
 577 * Loop explanation:  loop_clr_fd() sets lo_state to Lo_rundown before
 578 * calling kthread_stop().  Therefore once kthread_should_stop() is
 579 * true, make_request will not place any more requests.  Therefore
 580 * once kthread_should_stop() is true and lo_bio is NULL, we are
 581 * done with the loop.
 582 */
 583static int loop_thread(void *data)
 584{
 585        struct loop_device *lo = data;
 586        struct bio *bio;
 587
 588        set_user_nice(current, -20);
 589
 590        while (!kthread_should_stop() || lo->lo_bio) {
 591
 592                wait_event_interruptible(lo->lo_event,
 593                                lo->lo_bio || kthread_should_stop());
 594
 595                if (!lo->lo_bio)
 596                        continue;
 597                spin_lock_irq(&lo->lo_lock);
 598                bio = loop_get_bio(lo);
 599                spin_unlock_irq(&lo->lo_lock);
 600
 601                BUG_ON(!bio);
 602                loop_handle_bio(lo, bio);
 603        }
 604
 605        return 0;
 606}
 607
 608/*
 609 * loop_switch performs the hard work of switching a backing store.
 610 * First it needs to flush existing IO, it does this by sending a magic
 611 * BIO down the pipe. The completion of this BIO does the actual switch.
 612 */
 613static int loop_switch(struct loop_device *lo, struct file *file)
 614{
 615        struct switch_request w;
 616        struct bio *bio = bio_alloc(GFP_KERNEL, 0);
 617        if (!bio)
 618                return -ENOMEM;
 619        init_completion(&w.wait);
 620        w.file = file;
 621        bio->bi_private = &w;
 622        bio->bi_bdev = NULL;
 623        loop_make_request(lo->lo_queue, bio);
 624        wait_for_completion(&w.wait);
 625        return 0;
 626}
 627
 628/*
 629 * Do the actual switch; called from the BIO completion routine
 630 */
 631static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
 632{
 633        struct file *file = p->file;
 634        struct file *old_file = lo->lo_backing_file;
 635        struct address_space *mapping = file->f_mapping;
 636
 637        mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
 638        lo->lo_backing_file = file;
 639        lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
 640                mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
 641        lo->old_gfp_mask = mapping_gfp_mask(mapping);
 642        mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 643        complete(&p->wait);
 644}
 645
 646
 647/*
 648 * loop_change_fd switched the backing store of a loopback device to
 649 * a new file. This is useful for operating system installers to free up
 650 * the original file and in High Availability environments to switch to
 651 * an alternative location for the content in case of server meltdown.
 652 * This can only work if the loop device is used read-only, and if the
 653 * new backing store is the same size and type as the old backing store.
 654 */
 655static int loop_change_fd(struct loop_device *lo, struct file *lo_file,
 656                       struct block_device *bdev, unsigned int arg)
 657{
 658        struct file     *file, *old_file;
 659        struct inode    *inode;
 660        int             error;
 661
 662        error = -ENXIO;
 663        if (lo->lo_state != Lo_bound)
 664                goto out;
 665
 666        /* the loop device has to be read-only */
 667        error = -EINVAL;
 668        if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
 669                goto out;
 670
 671        error = -EBADF;
 672        file = fget(arg);
 673        if (!file)
 674                goto out;
 675
 676        inode = file->f_mapping->host;
 677        old_file = lo->lo_backing_file;
 678
 679        error = -EINVAL;
 680
 681        if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 682                goto out_putf;
 683
 684        /* new backing store needs to support loop (eg splice_read) */
 685        if (!inode->i_fop->splice_read)
 686                goto out_putf;
 687
 688        /* size of the new backing store needs to be the same */
 689        if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
 690                goto out_putf;
 691
 692        /* and ... switch */
 693        error = loop_switch(lo, file);
 694        if (error)
 695                goto out_putf;
 696
 697        fput(old_file);
 698        if (max_part > 0)
 699                ioctl_by_bdev(bdev, BLKRRPART, 0);
 700        return 0;
 701
 702 out_putf:
 703        fput(file);
 704 out:
 705        return error;
 706}
 707
 708static inline int is_loop_device(struct file *file)
 709{
 710        struct inode *i = file->f_mapping->host;
 711
 712        return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
 713}
 714
 715static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
 716                       struct block_device *bdev, unsigned int arg)
 717{
 718        struct file     *file, *f;
 719        struct inode    *inode;
 720        struct address_space *mapping;
 721        unsigned lo_blocksize;
 722        int             lo_flags = 0;
 723        int             error;
 724        loff_t          size;
 725
 726        /* This is safe, since we have a reference from open(). */
 727        __module_get(THIS_MODULE);
 728
 729        error = -EBADF;
 730        file = fget(arg);
 731        if (!file)
 732                goto out;
 733
 734        error = -EBUSY;
 735        if (lo->lo_state != Lo_unbound)
 736                goto out_putf;
 737
 738        /* Avoid recursion */
 739        f = file;
 740        while (is_loop_device(f)) {
 741                struct loop_device *l;
 742
 743                if (f->f_mapping->host->i_rdev == lo_file->f_mapping->host->i_rdev)
 744                        goto out_putf;
 745
 746                l = f->f_mapping->host->i_bdev->bd_disk->private_data;
 747                if (l->lo_state == Lo_unbound) {
 748                        error = -EINVAL;
 749                        goto out_putf;
 750                }
 751                f = l->lo_backing_file;
 752        }
 753
 754        mapping = file->f_mapping;
 755        inode = mapping->host;
 756
 757        if (!(file->f_mode & FMODE_WRITE))
 758                lo_flags |= LO_FLAGS_READ_ONLY;
 759
 760        error = -EINVAL;
 761        if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 762                const struct address_space_operations *aops = mapping->a_ops;
 763                /*
 764                 * If we can't read - sorry. If we only can't write - well,
 765                 * it's going to be read-only.
 766                 */
 767                if (!file->f_op->splice_read)
 768                        goto out_putf;
 769                if (aops->prepare_write || aops->write_begin)
 770                        lo_flags |= LO_FLAGS_USE_AOPS;
 771                if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
 772                        lo_flags |= LO_FLAGS_READ_ONLY;
 773
 774                lo_blocksize = S_ISBLK(inode->i_mode) ?
 775                        inode->i_bdev->bd_block_size : PAGE_SIZE;
 776
 777                error = 0;
 778        } else {
 779                goto out_putf;
 780        }
 781
 782        size = get_loop_size(lo, file);
 783
 784        if ((loff_t)(sector_t)size != size) {
 785                error = -EFBIG;
 786                goto out_putf;
 787        }
 788
 789        if (!(lo_file->f_mode & FMODE_WRITE))
 790                lo_flags |= LO_FLAGS_READ_ONLY;
 791
 792        set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
 793
 794        lo->lo_blocksize = lo_blocksize;
 795        lo->lo_device = bdev;
 796        lo->lo_flags = lo_flags;
 797        lo->lo_backing_file = file;
 798        lo->transfer = transfer_none;
 799        lo->ioctl = NULL;
 800        lo->lo_sizelimit = 0;
 801        lo->old_gfp_mask = mapping_gfp_mask(mapping);
 802        mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 803
 804        lo->lo_bio = lo->lo_biotail = NULL;
 805
 806        /*
 807         * set queue make_request_fn, and add limits based on lower level
 808         * device
 809         */
 810        blk_queue_make_request(lo->lo_queue, loop_make_request);
 811        lo->lo_queue->queuedata = lo;
 812        lo->lo_queue->unplug_fn = loop_unplug;
 813
 814        set_capacity(lo->lo_disk, size);
 815        bd_set_size(bdev, size << 9);
 816
 817        set_blocksize(bdev, lo_blocksize);
 818
 819        lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
 820                                                lo->lo_number);
 821        if (IS_ERR(lo->lo_thread)) {
 822                error = PTR_ERR(lo->lo_thread);
 823                goto out_clr;
 824        }
 825        lo->lo_state = Lo_bound;
 826        wake_up_process(lo->lo_thread);
 827        if (max_part > 0)
 828                ioctl_by_bdev(bdev, BLKRRPART, 0);
 829        return 0;
 830
 831out_clr:
 832        lo->lo_thread = NULL;
 833        lo->lo_device = NULL;
 834        lo->lo_backing_file = NULL;
 835        lo->lo_flags = 0;
 836        set_capacity(lo->lo_disk, 0);
 837        invalidate_bdev(bdev);
 838        bd_set_size(bdev, 0);
 839        mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
 840        lo->lo_state = Lo_unbound;
 841 out_putf:
 842        fput(file);
 843 out:
 844        /* This is safe: open() is still holding a reference. */
 845        module_put(THIS_MODULE);
 846        return error;
 847}
 848
 849static int
 850loop_release_xfer(struct loop_device *lo)
 851{
 852        int err = 0;
 853        struct loop_func_table *xfer = lo->lo_encryption;
 854
 855        if (xfer) {
 856                if (xfer->release)
 857                        err = xfer->release(lo);
 858                lo->transfer = NULL;
 859                lo->lo_encryption = NULL;
 860                module_put(xfer->owner);
 861        }
 862        return err;
 863}
 864
 865static int
 866loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
 867               const struct loop_info64 *i)
 868{
 869        int err = 0;
 870
 871        if (xfer) {
 872                struct module *owner = xfer->owner;
 873
 874                if (!try_module_get(owner))
 875                        return -EINVAL;
 876                if (xfer->init)
 877                        err = xfer->init(lo, i);
 878                if (err)
 879                        module_put(owner);
 880                else
 881                        lo->lo_encryption = xfer;
 882        }
 883        return err;
 884}
 885
 886static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 887{
 888        struct file *filp = lo->lo_backing_file;
 889        gfp_t gfp = lo->old_gfp_mask;
 890
 891        if (lo->lo_state != Lo_bound)
 892                return -ENXIO;
 893
 894        if (lo->lo_refcnt > 1)  /* we needed one fd for the ioctl */
 895                return -EBUSY;
 896
 897        if (filp == NULL)
 898                return -EINVAL;
 899
 900        spin_lock_irq(&lo->lo_lock);
 901        lo->lo_state = Lo_rundown;
 902        spin_unlock_irq(&lo->lo_lock);
 903
 904        kthread_stop(lo->lo_thread);
 905
 906        lo->lo_backing_file = NULL;
 907
 908        loop_release_xfer(lo);
 909        lo->transfer = NULL;
 910        lo->ioctl = NULL;
 911        lo->lo_device = NULL;
 912        lo->lo_encryption = NULL;
 913        lo->lo_offset = 0;
 914        lo->lo_sizelimit = 0;
 915        lo->lo_encrypt_key_size = 0;
 916        lo->lo_flags = 0;
 917        lo->lo_thread = NULL;
 918        memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
 919        memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
 920        memset(lo->lo_file_name, 0, LO_NAME_SIZE);
 921        invalidate_bdev(bdev);
 922        set_capacity(lo->lo_disk, 0);
 923        bd_set_size(bdev, 0);
 924        mapping_set_gfp_mask(filp->f_mapping, gfp);
 925        lo->lo_state = Lo_unbound;
 926        fput(filp);
 927        /* This is safe: open() is still holding a reference. */
 928        module_put(THIS_MODULE);
 929        if (max_part > 0)
 930                ioctl_by_bdev(bdev, BLKRRPART, 0);
 931        return 0;
 932}
 933
 934static int
 935loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 936{
 937        int err;
 938        struct loop_func_table *xfer;
 939
 940        if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
 941            !capable(CAP_SYS_ADMIN))
 942                return -EPERM;
 943        if (lo->lo_state != Lo_bound)
 944                return -ENXIO;
 945        if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
 946                return -EINVAL;
 947
 948        err = loop_release_xfer(lo);
 949        if (err)
 950                return err;
 951
 952        if (info->lo_encrypt_type) {
 953                unsigned int type = info->lo_encrypt_type;
 954
 955                if (type >= MAX_LO_CRYPT)
 956                        return -EINVAL;
 957                xfer = xfer_funcs[type];
 958                if (xfer == NULL)
 959                        return -EINVAL;
 960        } else
 961                xfer = NULL;
 962
 963        err = loop_init_xfer(lo, xfer, info);
 964        if (err)
 965                return err;
 966
 967        if (lo->lo_offset != info->lo_offset ||
 968            lo->lo_sizelimit != info->lo_sizelimit) {
 969                lo->lo_offset = info->lo_offset;
 970                lo->lo_sizelimit = info->lo_sizelimit;
 971                if (figure_loop_size(lo))
 972                        return -EFBIG;
 973        }
 974
 975        memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
 976        memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
 977        lo->lo_file_name[LO_NAME_SIZE-1] = 0;
 978        lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
 979
 980        if (!xfer)
 981                xfer = &none_funcs;
 982        lo->transfer = xfer->transfer;
 983        lo->ioctl = xfer->ioctl;
 984
 985        if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
 986             (info->lo_flags & LO_FLAGS_AUTOCLEAR))
 987                lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
 988
 989        lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
 990        lo->lo_init[0] = info->lo_init[0];
 991        lo->lo_init[1] = info->lo_init[1];
 992        if (info->lo_encrypt_key_size) {
 993                memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
 994                       info->lo_encrypt_key_size);
 995                lo->lo_key_owner = current->uid;
 996        }       
 997
 998        return 0;
 999}
1000
1001static int
1002loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1003{
1004        struct file *file = lo->lo_backing_file;
1005        struct kstat stat;
1006        int error;
1007
1008        if (lo->lo_state != Lo_bound)
1009                return -ENXIO;
1010        error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
1011        if (error)
1012                return error;
1013        memset(info, 0, sizeof(*info));
1014        info->lo_number = lo->lo_number;
1015        info->lo_device = huge_encode_dev(stat.dev);
1016        info->lo_inode = stat.ino;
1017        info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1018        info->lo_offset = lo->lo_offset;
1019        info->lo_sizelimit = lo->lo_sizelimit;
1020        info->lo_flags = lo->lo_flags;
1021        memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
1022        memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1023        info->lo_encrypt_type =
1024                lo->lo_encryption ? lo->lo_encryption->number : 0;
1025        if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1026                info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1027                memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1028                       lo->lo_encrypt_key_size);
1029        }
1030        return 0;
1031}
1032
1033static void
1034loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1035{
1036        memset(info64, 0, sizeof(*info64));
1037        info64->lo_number = info->lo_number;
1038        info64->lo_device = info->lo_device;
1039        info64->lo_inode = info->lo_inode;
1040        info64->lo_rdevice = info->lo_rdevice;
1041        info64->lo_offset = info->lo_offset;
1042        info64->lo_sizelimit = 0;
1043        info64->lo_encrypt_type = info->lo_encrypt_type;
1044        info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1045        info64->lo_flags = info->lo_flags;
1046        info64->lo_init[0] = info->lo_init[0];
1047        info64->lo_init[1] = info->lo_init[1];
1048        if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1049                memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1050        else
1051                memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1052        memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1053}
1054
1055static int
1056loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
1057{
1058        memset(info, 0, sizeof(*info));
1059        info->lo_number = info64->lo_number;
1060        info->lo_device = info64->lo_device;
1061        info->lo_inode = info64->lo_inode;
1062        info->lo_rdevice = info64->lo_rdevice;
1063        info->lo_offset = info64->lo_offset;
1064        info->lo_encrypt_type = info64->lo_encrypt_type;
1065        info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1066        info->lo_flags = info64->lo_flags;
1067        info->lo_init[0] = info64->lo_init[0];
1068        info->lo_init[1] = info64->lo_init[1];
1069        if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1070                memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1071        else
1072                memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1073        memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1074
1075        /* error in case values were truncated */
1076        if (info->lo_device != info64->lo_device ||
1077            info->lo_rdevice != info64->lo_rdevice ||
1078            info->lo_inode != info64->lo_inode ||
1079            info->lo_offset != info64->lo_offset)
1080                return -EOVERFLOW;
1081
1082        return 0;
1083}
1084
1085static int
1086loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
1087{
1088        struct loop_info info;
1089        struct loop_info64 info64;
1090
1091        if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1092                return -EFAULT;
1093        loop_info64_from_old(&info, &info64);
1094        return loop_set_status(lo, &info64);
1095}
1096
1097static int
1098loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
1099{
1100        struct loop_info64 info64;
1101
1102        if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1103                return -EFAULT;
1104        return loop_set_status(lo, &info64);
1105}
1106
1107static int
1108loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
1109        struct loop_info info;
1110        struct loop_info64 info64;
1111        int err = 0;
1112
1113        if (!arg)
1114                err = -EINVAL;
1115        if (!err)
1116                err = loop_get_status(lo, &info64);
1117        if (!err)
1118                err = loop_info64_to_old(&info64, &info);
1119        if (!err && copy_to_user(arg, &info, sizeof(info)))
1120                err = -EFAULT;
1121
1122        return err;
1123}
1124
1125static int
1126loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1127        struct loop_info64 info64;
1128        int err = 0;
1129
1130        if (!arg)
1131                err = -EINVAL;
1132        if (!err)
1133                err = loop_get_status(lo, &info64);
1134        if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1135                err = -EFAULT;
1136
1137        return err;
1138}
1139
1140static int lo_ioctl(struct inode * inode, struct file * file,
1141        unsigned int cmd, unsigned long arg)
1142{
1143        struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
1144        int err;
1145
1146        mutex_lock(&lo->lo_ctl_mutex);
1147        switch (cmd) {
1148        case LOOP_SET_FD:
1149                err = loop_set_fd(lo, file, inode->i_bdev, arg);
1150                break;
1151        case LOOP_CHANGE_FD:
1152                err = loop_change_fd(lo, file, inode->i_bdev, arg);
1153                break;
1154        case LOOP_CLR_FD:
1155                err = loop_clr_fd(lo, inode->i_bdev);
1156                break;
1157        case LOOP_SET_STATUS:
1158                err = loop_set_status_old(lo, (struct loop_info __user *) arg);
1159                break;
1160        case LOOP_GET_STATUS:
1161                err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1162                break;
1163        case LOOP_SET_STATUS64:
1164                err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
1165                break;
1166        case LOOP_GET_STATUS64:
1167                err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1168                break;
1169        default:
1170                err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1171        }
1172        mutex_unlock(&lo->lo_ctl_mutex);
1173        return err;
1174}
1175
1176#ifdef CONFIG_COMPAT
1177struct compat_loop_info {
1178        compat_int_t    lo_number;      /* ioctl r/o */
1179        compat_dev_t    lo_device;      /* ioctl r/o */
1180        compat_ulong_t  lo_inode;       /* ioctl r/o */
1181        compat_dev_t    lo_rdevice;     /* ioctl r/o */
1182        compat_int_t    lo_offset;
1183        compat_int_t    lo_encrypt_type;
1184        compat_int_t    lo_encrypt_key_size;    /* ioctl w/o */
1185        compat_int_t    lo_flags;       /* ioctl r/o */
1186        char            lo_name[LO_NAME_SIZE];
1187        unsigned char   lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1188        compat_ulong_t  lo_init[2];
1189        char            reserved[4];
1190};
1191
1192/*
1193 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1194 * - noinlined to reduce stack space usage in main part of driver
1195 */
1196static noinline int
1197loop_info64_from_compat(const struct compat_loop_info __user *arg,
1198                        struct loop_info64 *info64)
1199{
1200        struct compat_loop_info info;
1201
1202        if (copy_from_user(&info, arg, sizeof(info)))
1203                return -EFAULT;
1204
1205        memset(info64, 0, sizeof(*info64));
1206        info64->lo_number = info.lo_number;
1207        info64->lo_device = info.lo_device;
1208        info64->lo_inode = info.lo_inode;
1209        info64->lo_rdevice = info.lo_rdevice;
1210        info64->lo_offset = info.lo_offset;
1211        info64->lo_sizelimit = 0;
1212        info64->lo_encrypt_type = info.lo_encrypt_type;
1213        info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1214        info64->lo_flags = info.lo_flags;
1215        info64->lo_init[0] = info.lo_init[0];
1216        info64->lo_init[1] = info.lo_init[1];
1217        if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1218                memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1219        else
1220                memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1221        memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1222        return 0;
1223}
1224
1225/*
1226 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1227 * - noinlined to reduce stack space usage in main part of driver
1228 */
1229static noinline int
1230loop_info64_to_compat(const struct loop_info64 *info64,
1231                      struct compat_loop_info __user *arg)
1232{
1233        struct compat_loop_info info;
1234
1235        memset(&info, 0, sizeof(info));
1236        info.lo_number = info64->lo_number;
1237        info.lo_device = info64->lo_device;
1238        info.lo_inode = info64->lo_inode;
1239        info.lo_rdevice = info64->lo_rdevice;
1240        info.lo_offset = info64->lo_offset;
1241        info.lo_encrypt_type = info64->lo_encrypt_type;
1242        info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1243        info.lo_flags = info64->lo_flags;
1244        info.lo_init[0] = info64->lo_init[0];
1245        info.lo_init[1] = info64->lo_init[1];
1246        if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1247                memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1248        else
1249                memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1250        memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1251
1252        /* error in case values were truncated */
1253        if (info.lo_device != info64->lo_device ||
1254            info.lo_rdevice != info64->lo_rdevice ||
1255            info.lo_inode != info64->lo_inode ||
1256            info.lo_offset != info64->lo_offset ||
1257            info.lo_init[0] != info64->lo_init[0] ||
1258            info.lo_init[1] != info64->lo_init[1])
1259                return -EOVERFLOW;
1260
1261        if (copy_to_user(arg, &info, sizeof(info)))
1262                return -EFAULT;
1263        return 0;
1264}
1265
1266static int
1267loop_set_status_compat(struct loop_device *lo,
1268                       const struct compat_loop_info __user *arg)
1269{
1270        struct loop_info64 info64;
1271        int ret;
1272
1273        ret = loop_info64_from_compat(arg, &info64);
1274        if (ret < 0)
1275                return ret;
1276        return loop_set_status(lo, &info64);
1277}
1278
1279static int
1280loop_get_status_compat(struct loop_device *lo,
1281                       struct compat_loop_info __user *arg)
1282{
1283        struct loop_info64 info64;
1284        int err = 0;
1285
1286        if (!arg)
1287                err = -EINVAL;
1288        if (!err)
1289                err = loop_get_status(lo, &info64);
1290        if (!err)
1291                err = loop_info64_to_compat(&info64, arg);
1292        return err;
1293}
1294
1295static long lo_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1296{
1297        struct inode *inode = file->f_path.dentry->d_inode;
1298        struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
1299        int err;
1300
1301        switch(cmd) {
1302        case LOOP_SET_STATUS:
1303                mutex_lock(&lo->lo_ctl_mutex);
1304                err = loop_set_status_compat(
1305                        lo, (const struct compat_loop_info __user *) arg);
1306                mutex_unlock(&lo->lo_ctl_mutex);
1307                break;
1308        case LOOP_GET_STATUS:
1309                mutex_lock(&lo->lo_ctl_mutex);
1310                err = loop_get_status_compat(
1311                        lo, (struct compat_loop_info __user *) arg);
1312                mutex_unlock(&lo->lo_ctl_mutex);
1313                break;
1314        case LOOP_CLR_FD:
1315        case LOOP_GET_STATUS64:
1316        case LOOP_SET_STATUS64:
1317                arg = (unsigned long) compat_ptr(arg);
1318        case LOOP_SET_FD:
1319        case LOOP_CHANGE_FD:
1320                err = lo_ioctl(inode, file, cmd, arg);
1321                break;
1322        default:
1323                err = -ENOIOCTLCMD;
1324                break;
1325        }
1326        return err;
1327}
1328#endif
1329
1330static int lo_open(struct inode *inode, struct file *file)
1331{
1332        struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
1333
1334        mutex_lock(&lo->lo_ctl_mutex);
1335        lo->lo_refcnt++;
1336        mutex_unlock(&lo->lo_ctl_mutex);
1337
1338        return 0;
1339}
1340
1341static int lo_release(struct inode *inode, struct file *file)
1342{
1343        struct loop_device *lo = inode->i_bdev->bd_disk->private_data;
1344
1345        mutex_lock(&lo->lo_ctl_mutex);
1346        --lo->lo_refcnt;
1347
1348        if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) && !lo->lo_refcnt)
1349                loop_clr_fd(lo, inode->i_bdev);
1350
1351        mutex_unlock(&lo->lo_ctl_mutex);
1352
1353        return 0;
1354}
1355
1356static struct block_device_operations lo_fops = {
1357        .owner =        THIS_MODULE,
1358        .open =         lo_open,
1359        .release =      lo_release,
1360        .ioctl =        lo_ioctl,
1361#ifdef CONFIG_COMPAT
1362        .compat_ioctl = lo_compat_ioctl,
1363#endif
1364};
1365
1366/*
1367 * And now the modules code and kernel interface.
1368 */
1369static int max_loop;
1370module_param(max_loop, int, 0);
1371MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
1372module_param(max_part, int, 0);
1373MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
1374MODULE_LICENSE("GPL");
1375MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
1376
1377int loop_register_transfer(struct loop_func_table *funcs)
1378{
1379        unsigned int n = funcs->number;
1380
1381        if (n >= MAX_LO_CRYPT || xfer_funcs[n])
1382                return -EINVAL;
1383        xfer_funcs[n] = funcs;
1384        return 0;
1385}
1386
1387int loop_unregister_transfer(int number)
1388{
1389        unsigned int n = number;
1390        struct loop_device *lo;
1391        struct loop_func_table *xfer;
1392
1393        if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1394                return -EINVAL;
1395
1396        xfer_funcs[n] = NULL;
1397
1398        list_for_each_entry(lo, &loop_devices, lo_list) {
1399                mutex_lock(&lo->lo_ctl_mutex);
1400
1401                if (lo->lo_encryption == xfer)
1402                        loop_release_xfer(lo);
1403
1404                mutex_unlock(&lo->lo_ctl_mutex);
1405        }
1406
1407        return 0;
1408}
1409
1410EXPORT_SYMBOL(loop_register_transfer);
1411EXPORT_SYMBOL(loop_unregister_transfer);
1412
1413static struct loop_device *loop_alloc(int i)
1414{
1415        struct loop_device *lo;
1416        struct gendisk *disk;
1417
1418        lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1419        if (!lo)
1420                goto out;
1421
1422        lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1423        if (!lo->lo_queue)
1424                goto out_free_dev;
1425
1426        disk = lo->lo_disk = alloc_disk(1 << part_shift);
1427        if (!disk)
1428                goto out_free_queue;
1429
1430        mutex_init(&lo->lo_ctl_mutex);
1431        lo->lo_number           = i;
1432        lo->lo_thread           = NULL;
1433        init_waitqueue_head(&lo->lo_event);
1434        spin_lock_init(&lo->lo_lock);
1435        disk->major             = LOOP_MAJOR;
1436        disk->first_minor       = i << part_shift;
1437        disk->fops              = &lo_fops;
1438        disk->private_data      = lo;
1439        disk->queue             = lo->lo_queue;
1440        sprintf(disk->disk_name, "loop%d", i);
1441        return lo;
1442
1443out_free_queue:
1444        blk_cleanup_queue(lo->lo_queue);
1445out_free_dev:
1446        kfree(lo);
1447out:
1448        return NULL;
1449}
1450
1451static void loop_free(struct loop_device *lo)
1452{
1453        blk_cleanup_queue(lo->lo_queue);
1454        put_disk(lo->lo_disk);
1455        list_del(&lo->lo_list);
1456        kfree(lo);
1457}
1458
1459static struct loop_device *loop_init_one(int i)
1460{
1461        struct loop_device *lo;
1462
1463        list_for_each_entry(lo, &loop_devices, lo_list) {
1464                if (lo->lo_number == i)
1465                        return lo;
1466        }
1467
1468        lo = loop_alloc(i);
1469        if (lo) {
1470                add_disk(lo->lo_disk);
1471                list_add_tail(&lo->lo_list, &loop_devices);
1472        }
1473        return lo;
1474}
1475
1476static void loop_del_one(struct loop_device *lo)
1477{
1478        del_gendisk(lo->lo_disk);
1479        loop_free(lo);
1480}
1481
1482static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1483{
1484        struct loop_device *lo;
1485        struct kobject *kobj;
1486
1487        mutex_lock(&loop_devices_mutex);
1488        lo = loop_init_one(dev & MINORMASK);
1489        kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
1490        mutex_unlock(&loop_devices_mutex);
1491
1492        *part = 0;
1493        return kobj;
1494}
1495
1496static int __init loop_init(void)
1497{
1498        int i, nr;
1499        unsigned long range;
1500        struct loop_device *lo, *next;
1501
1502        /*
1503         * loop module now has a feature to instantiate underlying device
1504         * structure on-demand, provided that there is an access dev node.
1505         * However, this will not work well with user space tool that doesn't
1506         * know about such "feature".  In order to not break any existing
1507         * tool, we do the following:
1508         *
1509         * (1) if max_loop is specified, create that many upfront, and this
1510         *     also becomes a hard limit.
1511         * (2) if max_loop is not specified, create 8 loop device on module
1512         *     load, user can further extend loop device by create dev node
1513         *     themselves and have kernel automatically instantiate actual
1514         *     device on-demand.
1515         */
1516
1517        part_shift = 0;
1518        if (max_part > 0)
1519                part_shift = fls(max_part);
1520
1521        if (max_loop > 1UL << (MINORBITS - part_shift))
1522                return -EINVAL;
1523
1524        if (max_loop) {
1525                nr = max_loop;
1526                range = max_loop;
1527        } else {
1528                nr = 8;
1529                range = 1UL << (MINORBITS - part_shift);
1530        }
1531
1532        if (register_blkdev(LOOP_MAJOR, "loop"))
1533                return -EIO;
1534
1535        for (i = 0; i < nr; i++) {
1536                lo = loop_alloc(i);
1537                if (!lo)
1538                        goto Enomem;
1539                list_add_tail(&lo->lo_list, &loop_devices);
1540        }
1541
1542        /* point of no return */
1543
1544        list_for_each_entry(lo, &loop_devices, lo_list)
1545                add_disk(lo->lo_disk);
1546
1547        blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1548                                  THIS_MODULE, loop_probe, NULL, NULL);
1549
1550        printk(KERN_INFO "loop: module loaded\n");
1551        return 0;
1552
1553Enomem:
1554        printk(KERN_INFO "loop: out of memory\n");
1555
1556        list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1557                loop_free(lo);
1558
1559        unregister_blkdev(LOOP_MAJOR, "loop");
1560        return -ENOMEM;
1561}
1562
1563static void __exit loop_exit(void)
1564{
1565        unsigned long range;
1566        struct loop_device *lo, *next;
1567
1568        range = max_loop ? max_loop :  1UL << (MINORBITS - part_shift);
1569
1570        list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1571                loop_del_one(lo);
1572
1573        blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1574        unregister_blkdev(LOOP_MAJOR, "loop");
1575}
1576
1577module_init(loop_init);
1578module_exit(loop_exit);
1579
1580#ifndef MODULE
1581static int __init max_loop_setup(char *str)
1582{
1583        max_loop = simple_strtol(str, NULL, 0);
1584        return 1;
1585}
1586
1587__setup("max_loop=", max_loop_setup);
1588#endif
1589