linux-bk/drivers/md/dm-snap.c
<<
>>
Prefs
   1/*
   2 * dm-snapshot.c
   3 *
   4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
   5 *
   6 * This file is released under the GPL.
   7 */
   8
   9#include <linux/blkdev.h>
  10#include <linux/config.h>
  11#include <linux/ctype.h>
  12#include <linux/device-mapper.h>
  13#include <linux/fs.h>
  14#include <linux/init.h>
  15#include <linux/kdev_t.h>
  16#include <linux/list.h>
  17#include <linux/mempool.h>
  18#include <linux/module.h>
  19#include <linux/slab.h>
  20#include <linux/vmalloc.h>
  21
  22#include "dm-snap.h"
  23#include "dm-bio-list.h"
  24#include "kcopyd.h"
  25
  26/*
  27 * The percentage increment we will wake up users at
  28 */
  29#define WAKE_UP_PERCENT 5
  30
  31/*
  32 * kcopyd priority of snapshot operations
  33 */
  34#define SNAPSHOT_COPY_PRIORITY 2
  35
  36/*
  37 * Each snapshot reserves this many pages for io
  38 */
  39#define SNAPSHOT_PAGES 256
  40
  41struct pending_exception {
  42        struct exception e;
  43
  44        /*
  45         * Origin buffers waiting for this to complete are held
  46         * in a bio list
  47         */
  48        struct bio_list origin_bios;
  49        struct bio_list snapshot_bios;
  50
  51        /*
  52         * Other pending_exceptions that are processing this
  53         * chunk.  When this list is empty, we know we can
  54         * complete the origins.
  55         */
  56        struct list_head siblings;
  57
  58        /* Pointer back to snapshot context */
  59        struct dm_snapshot *snap;
  60
  61        /*
  62         * 1 indicates the exception has already been sent to
  63         * kcopyd.
  64         */
  65        int started;
  66};
  67
  68/*
  69 * Hash table mapping origin volumes to lists of snapshots and
  70 * a lock to protect it
  71 */
  72static kmem_cache_t *exception_cache;
  73static kmem_cache_t *pending_cache;
  74static mempool_t *pending_pool;
  75
  76/*
  77 * One of these per registered origin, held in the snapshot_origins hash
  78 */
  79struct origin {
  80        /* The origin device */
  81        struct block_device *bdev;
  82
  83        struct list_head hash_list;
  84
  85        /* List of snapshots for this origin */
  86        struct list_head snapshots;
  87};
  88
  89/*
  90 * Size of the hash table for origin volumes. If we make this
  91 * the size of the minors list then it should be nearly perfect
  92 */
  93#define ORIGIN_HASH_SIZE 256
  94#define ORIGIN_MASK      0xFF
  95static struct list_head *_origins;
  96static struct rw_semaphore _origins_lock;
  97
  98static int init_origin_hash(void)
  99{
 100        int i;
 101
 102        _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
 103                           GFP_KERNEL);
 104        if (!_origins) {
 105                DMERR("Device mapper: Snapshot: unable to allocate memory");
 106                return -ENOMEM;
 107        }
 108
 109        for (i = 0; i < ORIGIN_HASH_SIZE; i++)
 110                INIT_LIST_HEAD(_origins + i);
 111        init_rwsem(&_origins_lock);
 112
 113        return 0;
 114}
 115
 116static void exit_origin_hash(void)
 117{
 118        kfree(_origins);
 119}
 120
 121static inline unsigned int origin_hash(struct block_device *bdev)
 122{
 123        return bdev->bd_dev & ORIGIN_MASK;
 124}
 125
 126static struct origin *__lookup_origin(struct block_device *origin)
 127{
 128        struct list_head *ol;
 129        struct origin *o;
 130
 131        ol = &_origins[origin_hash(origin)];
 132        list_for_each_entry (o, ol, hash_list)
 133                if (bdev_equal(o->bdev, origin))
 134                        return o;
 135
 136        return NULL;
 137}
 138
 139static void __insert_origin(struct origin *o)
 140{
 141        struct list_head *sl = &_origins[origin_hash(o->bdev)];
 142        list_add_tail(&o->hash_list, sl);
 143}
 144
 145/*
 146 * Make a note of the snapshot and its origin so we can look it
 147 * up when the origin has a write on it.
 148 */
 149static int register_snapshot(struct dm_snapshot *snap)
 150{
 151        struct origin *o;
 152        struct block_device *bdev = snap->origin->bdev;
 153
 154        down_write(&_origins_lock);
 155        o = __lookup_origin(bdev);
 156
 157        if (!o) {
 158                /* New origin */
 159                o = kmalloc(sizeof(*o), GFP_KERNEL);
 160                if (!o) {
 161                        up_write(&_origins_lock);
 162                        return -ENOMEM;
 163                }
 164
 165                /* Initialise the struct */
 166                INIT_LIST_HEAD(&o->snapshots);
 167                o->bdev = bdev;
 168
 169                __insert_origin(o);
 170        }
 171
 172        list_add_tail(&snap->list, &o->snapshots);
 173
 174        up_write(&_origins_lock);
 175        return 0;
 176}
 177
 178static void unregister_snapshot(struct dm_snapshot *s)
 179{
 180        struct origin *o;
 181
 182        down_write(&_origins_lock);
 183        o = __lookup_origin(s->origin->bdev);
 184
 185        list_del(&s->list);
 186        if (list_empty(&o->snapshots)) {
 187                list_del(&o->hash_list);
 188                kfree(o);
 189        }
 190
 191        up_write(&_origins_lock);
 192}
 193
 194/*
 195 * Implementation of the exception hash tables.
 196 */
 197static int init_exception_table(struct exception_table *et, uint32_t size)
 198{
 199        unsigned int i;
 200
 201        et->hash_mask = size - 1;
 202        et->table = dm_vcalloc(size, sizeof(struct list_head));
 203        if (!et->table)
 204                return -ENOMEM;
 205
 206        for (i = 0; i < size; i++)
 207                INIT_LIST_HEAD(et->table + i);
 208
 209        return 0;
 210}
 211
 212static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
 213{
 214        struct list_head *slot;
 215        struct exception *ex, *next;
 216        int i, size;
 217
 218        size = et->hash_mask + 1;
 219        for (i = 0; i < size; i++) {
 220                slot = et->table + i;
 221
 222                list_for_each_entry_safe (ex, next, slot, hash_list)
 223                        kmem_cache_free(mem, ex);
 224        }
 225
 226        vfree(et->table);
 227}
 228
 229static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
 230{
 231        return chunk & et->hash_mask;
 232}
 233
 234static void insert_exception(struct exception_table *eh, struct exception *e)
 235{
 236        struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
 237        list_add(&e->hash_list, l);
 238}
 239
 240static inline void remove_exception(struct exception *e)
 241{
 242        list_del(&e->hash_list);
 243}
 244
 245/*
 246 * Return the exception data for a sector, or NULL if not
 247 * remapped.
 248 */
 249static struct exception *lookup_exception(struct exception_table *et,
 250                                          chunk_t chunk)
 251{
 252        struct list_head *slot;
 253        struct exception *e;
 254
 255        slot = &et->table[exception_hash(et, chunk)];
 256        list_for_each_entry (e, slot, hash_list)
 257                if (e->old_chunk == chunk)
 258                        return e;
 259
 260        return NULL;
 261}
 262
 263static inline struct exception *alloc_exception(void)
 264{
 265        struct exception *e;
 266
 267        e = kmem_cache_alloc(exception_cache, GFP_NOIO);
 268        if (!e)
 269                e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
 270
 271        return e;
 272}
 273
 274static inline void free_exception(struct exception *e)
 275{
 276        kmem_cache_free(exception_cache, e);
 277}
 278
 279static inline struct pending_exception *alloc_pending_exception(void)
 280{
 281        return mempool_alloc(pending_pool, GFP_NOIO);
 282}
 283
 284static inline void free_pending_exception(struct pending_exception *pe)
 285{
 286        mempool_free(pe, pending_pool);
 287}
 288
 289int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
 290{
 291        struct exception *e;
 292
 293        e = alloc_exception();
 294        if (!e)
 295                return -ENOMEM;
 296
 297        e->old_chunk = old;
 298        e->new_chunk = new;
 299        insert_exception(&s->complete, e);
 300        return 0;
 301}
 302
 303/*
 304 * Hard coded magic.
 305 */
 306static int calc_max_buckets(void)
 307{
 308        /* use a fixed size of 2MB */
 309        unsigned long mem = 2 * 1024 * 1024;
 310        mem /= sizeof(struct list_head);
 311
 312        return mem;
 313}
 314
 315/*
 316 * Rounds a number down to a power of 2.
 317 */
 318static inline uint32_t round_down(uint32_t n)
 319{
 320        while (n & (n - 1))
 321                n &= (n - 1);
 322        return n;
 323}
 324
 325/*
 326 * Allocate room for a suitable hash table.
 327 */
 328static int init_hash_tables(struct dm_snapshot *s)
 329{
 330        sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
 331
 332        /*
 333         * Calculate based on the size of the original volume or
 334         * the COW volume...
 335         */
 336        cow_dev_size = get_dev_size(s->cow->bdev);
 337        origin_dev_size = get_dev_size(s->origin->bdev);
 338        max_buckets = calc_max_buckets();
 339
 340        hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
 341        hash_size = min(hash_size, max_buckets);
 342
 343        /* Round it down to a power of 2 */
 344        hash_size = round_down(hash_size);
 345        if (init_exception_table(&s->complete, hash_size))
 346                return -ENOMEM;
 347
 348        /*
 349         * Allocate hash table for in-flight exceptions
 350         * Make this smaller than the real hash table
 351         */
 352        hash_size >>= 3;
 353        if (hash_size < 64)
 354                hash_size = 64;
 355
 356        if (init_exception_table(&s->pending, hash_size)) {
 357                exit_exception_table(&s->complete, exception_cache);
 358                return -ENOMEM;
 359        }
 360
 361        return 0;
 362}
 363
 364/*
 365 * Round a number up to the nearest 'size' boundary.  size must
 366 * be a power of 2.
 367 */
 368static inline ulong round_up(ulong n, ulong size)
 369{
 370        size--;
 371        return (n + size) & ~size;
 372}
 373
 374/*
 375 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
 376 */
 377static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 378{
 379        struct dm_snapshot *s;
 380        unsigned long chunk_size;
 381        int r = -EINVAL;
 382        char persistent;
 383        char *origin_path;
 384        char *cow_path;
 385        char *value;
 386        int blocksize;
 387
 388        if (argc < 4) {
 389                ti->error = "dm-snapshot: requires exactly 4 arguments";
 390                r = -EINVAL;
 391                goto bad1;
 392        }
 393
 394        origin_path = argv[0];
 395        cow_path = argv[1];
 396        persistent = toupper(*argv[2]);
 397
 398        if (persistent != 'P' && persistent != 'N') {
 399                ti->error = "Persistent flag is not P or N";
 400                r = -EINVAL;
 401                goto bad1;
 402        }
 403
 404        chunk_size = simple_strtoul(argv[3], &value, 10);
 405        if (chunk_size == 0 || value == NULL) {
 406                ti->error = "Invalid chunk size";
 407                r = -EINVAL;
 408                goto bad1;
 409        }
 410
 411        s = kmalloc(sizeof(*s), GFP_KERNEL);
 412        if (s == NULL) {
 413                ti->error = "Cannot allocate snapshot context private "
 414                    "structure";
 415                r = -ENOMEM;
 416                goto bad1;
 417        }
 418
 419        r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
 420        if (r) {
 421                ti->error = "Cannot get origin device";
 422                goto bad2;
 423        }
 424
 425        r = dm_get_device(ti, cow_path, 0, 0,
 426                          FMODE_READ | FMODE_WRITE, &s->cow);
 427        if (r) {
 428                dm_put_device(ti, s->origin);
 429                ti->error = "Cannot get COW device";
 430                goto bad2;
 431        }
 432
 433        /*
 434         * Chunk size must be multiple of page size.  Silently
 435         * round up if it's not.
 436         */
 437        chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
 438
 439        /* Validate the chunk size against the device block size */
 440        blocksize = s->cow->bdev->bd_disk->queue->hardsect_size;
 441        if (chunk_size % (blocksize >> 9)) {
 442                ti->error = "Chunk size is not a multiple of device blocksize";
 443                r = -EINVAL;
 444                goto bad3;
 445        }
 446
 447        /* Check chunk_size is a power of 2 */
 448        if (chunk_size & (chunk_size - 1)) {
 449                ti->error = "Chunk size is not a power of 2";
 450                r = -EINVAL;
 451                goto bad3;
 452        }
 453
 454        s->chunk_size = chunk_size;
 455        s->chunk_mask = chunk_size - 1;
 456        s->type = persistent;
 457        s->chunk_shift = ffs(chunk_size) - 1;
 458
 459        s->valid = 1;
 460        s->have_metadata = 0;
 461        s->last_percent = 0;
 462        init_rwsem(&s->lock);
 463        s->table = ti->table;
 464
 465        /* Allocate hash table for COW data */
 466        if (init_hash_tables(s)) {
 467                ti->error = "Unable to allocate hash table space";
 468                r = -ENOMEM;
 469                goto bad3;
 470        }
 471
 472        /*
 473         * Check the persistent flag - done here because we need the iobuf
 474         * to check the LV header
 475         */
 476        s->store.snap = s;
 477
 478        if (persistent == 'P')
 479                r = dm_create_persistent(&s->store, chunk_size);
 480        else
 481                r = dm_create_transient(&s->store, s, blocksize);
 482
 483        if (r) {
 484                ti->error = "Couldn't create exception store";
 485                r = -EINVAL;
 486                goto bad4;
 487        }
 488
 489        r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
 490        if (r) {
 491                ti->error = "Could not create kcopyd client";
 492                goto bad5;
 493        }
 494
 495        /* Add snapshot to the list of snapshots for this origin */
 496        if (register_snapshot(s)) {
 497                r = -EINVAL;
 498                ti->error = "Cannot register snapshot origin";
 499                goto bad6;
 500        }
 501
 502        ti->private = s;
 503        ti->split_io = chunk_size;
 504
 505        return 0;
 506
 507 bad6:
 508        kcopyd_client_destroy(s->kcopyd_client);
 509
 510 bad5:
 511        s->store.destroy(&s->store);
 512
 513 bad4:
 514        exit_exception_table(&s->pending, pending_cache);
 515        exit_exception_table(&s->complete, exception_cache);
 516
 517 bad3:
 518        dm_put_device(ti, s->cow);
 519        dm_put_device(ti, s->origin);
 520
 521 bad2:
 522        kfree(s);
 523
 524 bad1:
 525        return r;
 526}
 527
 528static void snapshot_dtr(struct dm_target *ti)
 529{
 530        struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 531
 532        unregister_snapshot(s);
 533
 534        exit_exception_table(&s->pending, pending_cache);
 535        exit_exception_table(&s->complete, exception_cache);
 536
 537        /* Deallocate memory used */
 538        s->store.destroy(&s->store);
 539
 540        dm_put_device(ti, s->origin);
 541        dm_put_device(ti, s->cow);
 542        kcopyd_client_destroy(s->kcopyd_client);
 543        kfree(s);
 544}
 545
 546/*
 547 * Flush a list of buffers.
 548 */
 549static void flush_bios(struct bio *bio)
 550{
 551        struct bio *n;
 552
 553        while (bio) {
 554                n = bio->bi_next;
 555                bio->bi_next = NULL;
 556                generic_make_request(bio);
 557                bio = n;
 558        }
 559}
 560
 561/*
 562 * Error a list of buffers.
 563 */
 564static void error_bios(struct bio *bio)
 565{
 566        struct bio *n;
 567
 568        while (bio) {
 569                n = bio->bi_next;
 570                bio->bi_next = NULL;
 571                bio_io_error(bio, bio->bi_size);
 572                bio = n;
 573        }
 574}
 575
 576static struct bio *__flush_bios(struct pending_exception *pe)
 577{
 578        struct pending_exception *sibling;
 579
 580        if (list_empty(&pe->siblings))
 581                return bio_list_get(&pe->origin_bios);
 582
 583        sibling = list_entry(pe->siblings.next,
 584                             struct pending_exception, siblings);
 585
 586        list_del(&pe->siblings);
 587
 588        /* This is fine as long as kcopyd is single-threaded. If kcopyd
 589         * becomes multi-threaded, we'll need some locking here.
 590         */
 591        bio_list_merge(&sibling->origin_bios, &pe->origin_bios);
 592
 593        return NULL;
 594}
 595
 596static void pending_complete(struct pending_exception *pe, int success)
 597{
 598        struct exception *e;
 599        struct dm_snapshot *s = pe->snap;
 600        struct bio *flush = NULL;
 601
 602        if (success) {
 603                e = alloc_exception();
 604                if (!e) {
 605                        DMWARN("Unable to allocate exception.");
 606                        down_write(&s->lock);
 607                        s->store.drop_snapshot(&s->store);
 608                        s->valid = 0;
 609                        flush = __flush_bios(pe);
 610                        up_write(&s->lock);
 611
 612                        error_bios(bio_list_get(&pe->snapshot_bios));
 613                        goto out;
 614                }
 615                *e = pe->e;
 616
 617                /*
 618                 * Add a proper exception, and remove the
 619                 * in-flight exception from the list.
 620                 */
 621                down_write(&s->lock);
 622                insert_exception(&s->complete, e);
 623                remove_exception(&pe->e);
 624                flush = __flush_bios(pe);
 625
 626                /* Submit any pending write bios */
 627                up_write(&s->lock);
 628
 629                flush_bios(bio_list_get(&pe->snapshot_bios));
 630        } else {
 631                /* Read/write error - snapshot is unusable */
 632                down_write(&s->lock);
 633                if (s->valid)
 634                        DMERR("Error reading/writing snapshot");
 635                s->store.drop_snapshot(&s->store);
 636                s->valid = 0;
 637                remove_exception(&pe->e);
 638                flush = __flush_bios(pe);
 639                up_write(&s->lock);
 640
 641                error_bios(bio_list_get(&pe->snapshot_bios));
 642
 643                dm_table_event(s->table);
 644        }
 645
 646 out:
 647        free_pending_exception(pe);
 648
 649        if (flush)
 650                flush_bios(flush);
 651}
 652
 653static void commit_callback(void *context, int success)
 654{
 655        struct pending_exception *pe = (struct pending_exception *) context;
 656        pending_complete(pe, success);
 657}
 658
 659/*
 660 * Called when the copy I/O has finished.  kcopyd actually runs
 661 * this code so don't block.
 662 */
 663static void copy_callback(int read_err, unsigned int write_err, void *context)
 664{
 665        struct pending_exception *pe = (struct pending_exception *) context;
 666        struct dm_snapshot *s = pe->snap;
 667
 668        if (read_err || write_err)
 669                pending_complete(pe, 0);
 670
 671        else
 672                /* Update the metadata if we are persistent */
 673                s->store.commit_exception(&s->store, &pe->e, commit_callback,
 674                                          pe);
 675}
 676
 677/*
 678 * Dispatches the copy operation to kcopyd.
 679 */
 680static inline void start_copy(struct pending_exception *pe)
 681{
 682        struct dm_snapshot *s = pe->snap;
 683        struct io_region src, dest;
 684        struct block_device *bdev = s->origin->bdev;
 685        sector_t dev_size;
 686
 687        dev_size = get_dev_size(bdev);
 688
 689        src.bdev = bdev;
 690        src.sector = chunk_to_sector(s, pe->e.old_chunk);
 691        src.count = min(s->chunk_size, dev_size - src.sector);
 692
 693        dest.bdev = s->cow->bdev;
 694        dest.sector = chunk_to_sector(s, pe->e.new_chunk);
 695        dest.count = src.count;
 696
 697        /* Hand over to kcopyd */
 698        kcopyd_copy(s->kcopyd_client,
 699                    &src, 1, &dest, 0, copy_callback, pe);
 700}
 701
 702/*
 703 * Looks to see if this snapshot already has a pending exception
 704 * for this chunk, otherwise it allocates a new one and inserts
 705 * it into the pending table.
 706 *
 707 * NOTE: a write lock must be held on snap->lock before calling
 708 * this.
 709 */
 710static struct pending_exception *
 711__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
 712{
 713        struct exception *e;
 714        struct pending_exception *pe;
 715        chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
 716
 717        /*
 718         * Is there a pending exception for this already ?
 719         */
 720        e = lookup_exception(&s->pending, chunk);
 721        if (e) {
 722                /* cast the exception to a pending exception */
 723                pe = container_of(e, struct pending_exception, e);
 724
 725        } else {
 726                /*
 727                 * Create a new pending exception, we don't want
 728                 * to hold the lock while we do this.
 729                 */
 730                up_write(&s->lock);
 731                pe = alloc_pending_exception();
 732                down_write(&s->lock);
 733
 734                e = lookup_exception(&s->pending, chunk);
 735                if (e) {
 736                        free_pending_exception(pe);
 737                        pe = container_of(e, struct pending_exception, e);
 738                } else {
 739                        pe->e.old_chunk = chunk;
 740                        bio_list_init(&pe->origin_bios);
 741                        bio_list_init(&pe->snapshot_bios);
 742                        INIT_LIST_HEAD(&pe->siblings);
 743                        pe->snap = s;
 744                        pe->started = 0;
 745
 746                        if (s->store.prepare_exception(&s->store, &pe->e)) {
 747                                free_pending_exception(pe);
 748                                s->valid = 0;
 749                                return NULL;
 750                        }
 751
 752                        insert_exception(&s->pending, &pe->e);
 753                }
 754        }
 755
 756        return pe;
 757}
 758
 759static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
 760                                   struct bio *bio)
 761{
 762        bio->bi_bdev = s->cow->bdev;
 763        bio->bi_sector = chunk_to_sector(s, e->new_chunk) +
 764                (bio->bi_sector & s->chunk_mask);
 765}
 766
 767static int snapshot_map(struct dm_target *ti, struct bio *bio,
 768                        union map_info *map_context)
 769{
 770        struct exception *e;
 771        struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 772        int r = 1;
 773        chunk_t chunk;
 774        struct pending_exception *pe;
 775
 776        chunk = sector_to_chunk(s, bio->bi_sector);
 777
 778        /* Full snapshots are not usable */
 779        if (!s->valid)
 780                return -1;
 781
 782        /*
 783         * Write to snapshot - higher level takes care of RW/RO
 784         * flags so we should only get this if we are
 785         * writeable.
 786         */
 787        if (bio_rw(bio) == WRITE) {
 788
 789                /* FIXME: should only take write lock if we need
 790                 * to copy an exception */
 791                down_write(&s->lock);
 792
 793                /* If the block is already remapped - use that, else remap it */
 794                e = lookup_exception(&s->complete, chunk);
 795                if (e) {
 796                        remap_exception(s, e, bio);
 797                        up_write(&s->lock);
 798
 799                } else {
 800                        pe = __find_pending_exception(s, bio);
 801
 802                        if (!pe) {
 803                                if (s->store.drop_snapshot)
 804                                        s->store.drop_snapshot(&s->store);
 805                                s->valid = 0;
 806                                r = -EIO;
 807                                up_write(&s->lock);
 808                        } else {
 809                                remap_exception(s, &pe->e, bio);
 810                                bio_list_add(&pe->snapshot_bios, bio);
 811
 812                                if (!pe->started) {
 813                                        /* this is protected by snap->lock */
 814                                        pe->started = 1;
 815                                        up_write(&s->lock);
 816                                        start_copy(pe);
 817                                } else
 818                                        up_write(&s->lock);
 819                                r = 0;
 820                        }
 821                }
 822
 823        } else {
 824                /*
 825                 * FIXME: this read path scares me because we
 826                 * always use the origin when we have a pending
 827                 * exception.  However I can't think of a
 828                 * situation where this is wrong - ejt.
 829                 */
 830
 831                /* Do reads */
 832                down_read(&s->lock);
 833
 834                /* See if it it has been remapped */
 835                e = lookup_exception(&s->complete, chunk);
 836                if (e)
 837                        remap_exception(s, e, bio);
 838                else
 839                        bio->bi_bdev = s->origin->bdev;
 840
 841                up_read(&s->lock);
 842        }
 843
 844        return r;
 845}
 846
 847static void snapshot_resume(struct dm_target *ti)
 848{
 849        struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 850
 851        if (s->have_metadata)
 852                return;
 853
 854        if (s->store.read_metadata(&s->store)) {
 855                down_write(&s->lock);
 856                s->valid = 0;
 857                up_write(&s->lock);
 858        }
 859
 860        s->have_metadata = 1;
 861}
 862
 863static int snapshot_status(struct dm_target *ti, status_type_t type,
 864                           char *result, unsigned int maxlen)
 865{
 866        struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
 867        char cow[32];
 868        char org[32];
 869
 870        switch (type) {
 871        case STATUSTYPE_INFO:
 872                if (!snap->valid)
 873                        snprintf(result, maxlen, "Invalid");
 874                else {
 875                        if (snap->store.fraction_full) {
 876                                sector_t numerator, denominator;
 877                                snap->store.fraction_full(&snap->store,
 878                                                          &numerator,
 879                                                          &denominator);
 880                                snprintf(result, maxlen,
 881                                         SECTOR_FORMAT "/" SECTOR_FORMAT,
 882                                         numerator, denominator);
 883                        }
 884                        else
 885                                snprintf(result, maxlen, "Unknown");
 886                }
 887                break;
 888
 889        case STATUSTYPE_TABLE:
 890                /*
 891                 * kdevname returns a static pointer so we need
 892                 * to make private copies if the output is to
 893                 * make sense.
 894                 */
 895                format_dev_t(cow, snap->cow->bdev->bd_dev);
 896                format_dev_t(org, snap->origin->bdev->bd_dev);
 897                snprintf(result, maxlen, "%s %s %c " SECTOR_FORMAT, org, cow,
 898                         snap->type, snap->chunk_size);
 899                break;
 900        }
 901
 902        return 0;
 903}
 904
 905/*-----------------------------------------------------------------
 906 * Origin methods
 907 *---------------------------------------------------------------*/
 908static void list_merge(struct list_head *l1, struct list_head *l2)
 909{
 910        struct list_head *l1_n, *l2_p;
 911
 912        l1_n = l1->next;
 913        l2_p = l2->prev;
 914
 915        l1->next = l2;
 916        l2->prev = l1;
 917
 918        l2_p->next = l1_n;
 919        l1_n->prev = l2_p;
 920}
 921
 922static int __origin_write(struct list_head *snapshots, struct bio *bio)
 923{
 924        int r = 1, first = 1;
 925        struct dm_snapshot *snap;
 926        struct exception *e;
 927        struct pending_exception *pe, *last = NULL;
 928        chunk_t chunk;
 929
 930        /* Do all the snapshots on this origin */
 931        list_for_each_entry (snap, snapshots, list) {
 932
 933                /* Only deal with valid snapshots */
 934                if (!snap->valid)
 935                        continue;
 936
 937                down_write(&snap->lock);
 938
 939                /*
 940                 * Remember, different snapshots can have
 941                 * different chunk sizes.
 942                 */
 943                chunk = sector_to_chunk(snap, bio->bi_sector);
 944
 945                /*
 946                 * Check exception table to see if block
 947                 * is already remapped in this snapshot
 948                 * and trigger an exception if not.
 949                 */
 950                e = lookup_exception(&snap->complete, chunk);
 951                if (!e) {
 952                        pe = __find_pending_exception(snap, bio);
 953                        if (!pe) {
 954                                snap->store.drop_snapshot(&snap->store);
 955                                snap->valid = 0;
 956
 957                        } else {
 958                                if (last)
 959                                        list_merge(&pe->siblings,
 960                                                   &last->siblings);
 961
 962                                last = pe;
 963                                r = 0;
 964                        }
 965                }
 966
 967                up_write(&snap->lock);
 968        }
 969
 970        /*
 971         * Now that we have a complete pe list we can start the copying.
 972         */
 973        if (last) {
 974                pe = last;
 975                do {
 976                        down_write(&pe->snap->lock);
 977                        if (first)
 978                                bio_list_add(&pe->origin_bios, bio);
 979                        if (!pe->started) {
 980                                pe->started = 1;
 981                                up_write(&pe->snap->lock);
 982                                start_copy(pe);
 983                        } else
 984                                up_write(&pe->snap->lock);
 985                        first = 0;
 986                        pe = list_entry(pe->siblings.next,
 987                                        struct pending_exception, siblings);
 988
 989                } while (pe != last);
 990        }
 991
 992        return r;
 993}
 994
 995/*
 996 * Called on a write from the origin driver.
 997 */
 998static int do_origin(struct dm_dev *origin, struct bio *bio)
 999{
1000        struct origin *o;
1001        int r = 1;
1002
1003        down_read(&_origins_lock);
1004        o = __lookup_origin(origin->bdev);
1005        if (o)
1006                r = __origin_write(&o->snapshots, bio);
1007        up_read(&_origins_lock);
1008
1009        return r;
1010}
1011
1012/*
1013 * Origin: maps a linear range of a device, with hooks for snapshotting.
1014 */
1015
1016/*
1017 * Construct an origin mapping: <dev_path>
1018 * The context for an origin is merely a 'struct dm_dev *'
1019 * pointing to the real device.
1020 */
1021static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1022{
1023        int r;
1024        struct dm_dev *dev;
1025
1026        if (argc != 1) {
1027                ti->error = "dm-origin: incorrect number of arguments";
1028                return -EINVAL;
1029        }
1030
1031        r = dm_get_device(ti, argv[0], 0, ti->len,
1032                          dm_table_get_mode(ti->table), &dev);
1033        if (r) {
1034                ti->error = "Cannot get target device";
1035                return r;
1036        }
1037
1038        ti->private = dev;
1039        return 0;
1040}
1041
1042static void origin_dtr(struct dm_target *ti)
1043{
1044        struct dm_dev *dev = (struct dm_dev *) ti->private;
1045        dm_put_device(ti, dev);
1046}
1047
1048static int origin_map(struct dm_target *ti, struct bio *bio,
1049                      union map_info *map_context)
1050{
1051        struct dm_dev *dev = (struct dm_dev *) ti->private;
1052        bio->bi_bdev = dev->bdev;
1053
1054        /* Only tell snapshots if this is a write */
1055        return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1;
1056}
1057
1058#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
1059
1060/*
1061 * Set the target "split_io" field to the minimum of all the snapshots'
1062 * chunk sizes.
1063 */
1064static void origin_resume(struct dm_target *ti)
1065{
1066        struct dm_dev *dev = (struct dm_dev *) ti->private;
1067        struct dm_snapshot *snap;
1068        struct origin *o;
1069        chunk_t chunk_size = 0;
1070
1071        down_read(&_origins_lock);
1072        o = __lookup_origin(dev->bdev);
1073        if (o)
1074                list_for_each_entry (snap, &o->snapshots, list)
1075                        chunk_size = min_not_zero(chunk_size, snap->chunk_size);
1076        up_read(&_origins_lock);
1077
1078        ti->split_io = chunk_size;
1079}
1080
1081static int origin_status(struct dm_target *ti, status_type_t type, char *result,
1082                         unsigned int maxlen)
1083{
1084        struct dm_dev *dev = (struct dm_dev *) ti->private;
1085        char buffer[32];
1086
1087        switch (type) {
1088        case STATUSTYPE_INFO:
1089                result[0] = '\0';
1090                break;
1091
1092        case STATUSTYPE_TABLE:
1093                format_dev_t(buffer, dev->bdev->bd_dev);
1094                snprintf(result, maxlen, "%s", buffer);
1095                break;
1096        }
1097
1098        return 0;
1099}
1100
1101static struct target_type origin_target = {
1102        .name    = "snapshot-origin",
1103        .version = {1, 0, 1},
1104        .module  = THIS_MODULE,
1105        .ctr     = origin_ctr,
1106        .dtr     = origin_dtr,
1107        .map     = origin_map,
1108        .resume  = origin_resume,
1109        .status  = origin_status,
1110};
1111
1112static struct target_type snapshot_target = {
1113        .name    = "snapshot",
1114        .version = {1, 0, 1},
1115        .module  = THIS_MODULE,
1116        .ctr     = snapshot_ctr,
1117        .dtr     = snapshot_dtr,
1118        .map     = snapshot_map,
1119        .resume  = snapshot_resume,
1120        .status  = snapshot_status,
1121};
1122
1123static int __init dm_snapshot_init(void)
1124{
1125        int r;
1126
1127        r = dm_register_target(&snapshot_target);
1128        if (r) {
1129                DMERR("snapshot target register failed %d", r);
1130                return r;
1131        }
1132
1133        r = dm_register_target(&origin_target);
1134        if (r < 0) {
1135                DMERR("Device mapper: Origin: register failed %d\n", r);
1136                goto bad1;
1137        }
1138
1139        r = init_origin_hash();
1140        if (r) {
1141                DMERR("init_origin_hash failed.");
1142                goto bad2;
1143        }
1144
1145        exception_cache = kmem_cache_create("dm-snapshot-ex",
1146                                            sizeof(struct exception),
1147                                            __alignof__(struct exception),
1148                                            0, NULL, NULL);
1149        if (!exception_cache) {
1150                DMERR("Couldn't create exception cache.");
1151                r = -ENOMEM;
1152                goto bad3;
1153        }
1154
1155        pending_cache =
1156            kmem_cache_create("dm-snapshot-in",
1157                              sizeof(struct pending_exception),
1158                              __alignof__(struct pending_exception),
1159                              0, NULL, NULL);
1160        if (!pending_cache) {
1161                DMERR("Couldn't create pending cache.");
1162                r = -ENOMEM;
1163                goto bad4;
1164        }
1165
1166        pending_pool = mempool_create(128, mempool_alloc_slab,
1167                                      mempool_free_slab, pending_cache);
1168        if (!pending_pool) {
1169                DMERR("Couldn't create pending pool.");
1170                r = -ENOMEM;
1171                goto bad5;
1172        }
1173
1174        return 0;
1175
1176      bad5:
1177        kmem_cache_destroy(pending_cache);
1178      bad4:
1179        kmem_cache_destroy(exception_cache);
1180      bad3:
1181        exit_origin_hash();
1182      bad2:
1183        dm_unregister_target(&origin_target);
1184      bad1:
1185        dm_unregister_target(&snapshot_target);
1186        return r;
1187}
1188
1189static void __exit dm_snapshot_exit(void)
1190{
1191        int r;
1192
1193        r = dm_unregister_target(&snapshot_target);
1194        if (r)
1195                DMERR("snapshot unregister failed %d", r);
1196
1197        r = dm_unregister_target(&origin_target);
1198        if (r)
1199                DMERR("origin unregister failed %d", r);
1200
1201        exit_origin_hash();
1202        mempool_destroy(pending_pool);
1203        kmem_cache_destroy(pending_cache);
1204        kmem_cache_destroy(exception_cache);
1205}
1206
1207/* Module hooks */
1208module_init(dm_snapshot_init);
1209module_exit(dm_snapshot_exit);
1210
1211MODULE_DESCRIPTION(DM_NAME " snapshot target");
1212MODULE_AUTHOR("Joe Thornber");
1213MODULE_LICENSE("GPL");
1214
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.