linux/drivers/md/bitmap.c
<<
>>
Prefs
   1/*
   2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
   3 *
   4 * bitmap_create  - sets up the bitmap structure
   5 * bitmap_destroy - destroys the bitmap structure
   6 *
   7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
   8 * - added disk storage for bitmap
   9 * - changes to allow various bitmap chunk sizes
  10 */
  11
  12/*
  13 * Still to do:
  14 *
  15 * flush after percent set rather than just time based. (maybe both).
  16 * wait if count gets too high, wake when it drops to half.
  17 */
  18
  19#include <linux/module.h>
  20#include <linux/errno.h>
  21#include <linux/slab.h>
  22#include <linux/init.h>
  23#include <linux/timer.h>
  24#include <linux/sched.h>
  25#include <linux/list.h>
  26#include <linux/file.h>
  27#include <linux/mount.h>
  28#include <linux/buffer_head.h>
  29#include <linux/raid/md.h>
  30#include <linux/raid/bitmap.h>
  31
  32/* debug macros */
  33
  34#define DEBUG 0
  35
  36#if DEBUG
  37/* these are for debugging purposes only! */
  38
  39/* define one and only one of these */
  40#define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */
  41#define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/
  42#define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */
  43#define INJECT_FAULTS_4 0 /* undef */
  44#define INJECT_FAULTS_5 0 /* undef */
  45#define INJECT_FAULTS_6 0
  46
  47/* if these are defined, the driver will fail! debug only */
  48#define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */
  49#define INJECT_FATAL_FAULT_2 0 /* undef */
  50#define INJECT_FATAL_FAULT_3 0 /* undef */
  51#endif
  52
  53//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */
  54#define DPRINTK(x...) do { } while(0)
  55
  56#ifndef PRINTK
  57#  if DEBUG > 0
  58#    define PRINTK(x...) printk(KERN_DEBUG x)
  59#  else
  60#    define PRINTK(x...)
  61#  endif
  62#endif
  63
  64static inline char * bmname(struct bitmap *bitmap)
  65{
  66        return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
  67}
  68
  69
  70/*
  71 * just a placeholder - calls kmalloc for bitmap pages
  72 */
  73static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
  74{
  75        unsigned char *page;
  76
  77#ifdef INJECT_FAULTS_1
  78        page = NULL;
  79#else
  80        page = kmalloc(PAGE_SIZE, GFP_NOIO);
  81#endif
  82        if (!page)
  83                printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
  84        else
  85                PRINTK("%s: bitmap_alloc_page: allocated page at %p\n",
  86                        bmname(bitmap), page);
  87        return page;
  88}
  89
  90/*
  91 * for now just a placeholder -- just calls kfree for bitmap pages
  92 */
  93static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
  94{
  95        PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page);
  96        kfree(page);
  97}
  98
  99/*
 100 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
 101 *
 102 * 1) check to see if this page is allocated, if it's not then try to alloc
 103 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
 104 *    page pointer directly as a counter
 105 *
 106 * if we find our page, we increment the page's refcount so that it stays
 107 * allocated while we're using it
 108 */
 109static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
 110{
 111        unsigned char *mappage;
 112
 113        if (page >= bitmap->pages) {
 114                printk(KERN_ALERT
 115                        "%s: invalid bitmap page request: %lu (> %lu)\n",
 116                        bmname(bitmap), page, bitmap->pages-1);
 117                return -EINVAL;
 118        }
 119
 120
 121        if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
 122                return 0;
 123
 124        if (bitmap->bp[page].map) /* page is already allocated, just return */
 125                return 0;
 126
 127        if (!create)
 128                return -ENOENT;
 129
 130        spin_unlock_irq(&bitmap->lock);
 131
 132        /* this page has not been allocated yet */
 133
 134        if ((mappage = bitmap_alloc_page(bitmap)) == NULL) {
 135                PRINTK("%s: bitmap map page allocation failed, hijacking\n",
 136                        bmname(bitmap));
 137                /* failed - set the hijacked flag so that we can use the
 138                 * pointer as a counter */
 139                spin_lock_irq(&bitmap->lock);
 140                if (!bitmap->bp[page].map)
 141                        bitmap->bp[page].hijacked = 1;
 142                goto out;
 143        }
 144
 145        /* got a page */
 146
 147        spin_lock_irq(&bitmap->lock);
 148
 149        /* recheck the page */
 150
 151        if (bitmap->bp[page].map || bitmap->bp[page].hijacked) {
 152                /* somebody beat us to getting the page */
 153                bitmap_free_page(bitmap, mappage);
 154                return 0;
 155        }
 156
 157        /* no page was in place and we have one, so install it */
 158
 159        memset(mappage, 0, PAGE_SIZE);
 160        bitmap->bp[page].map = mappage;
 161        bitmap->missing_pages--;
 162out:
 163        return 0;
 164}
 165
 166
 167/* if page is completely empty, put it back on the free list, or dealloc it */
 168/* if page was hijacked, unmark the flag so it might get alloced next time */
 169/* Note: lock should be held when calling this */
 170static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
 171{
 172        char *ptr;
 173
 174        if (bitmap->bp[page].count) /* page is still busy */
 175                return;
 176
 177        /* page is no longer in use, it can be released */
 178
 179        if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
 180                bitmap->bp[page].hijacked = 0;
 181                bitmap->bp[page].map = NULL;
 182                return;
 183        }
 184
 185        /* normal case, free the page */
 186
 187#if 0
 188/* actually ... let's not.  We will probably need the page again exactly when
 189 * memory is tight and we are flusing to disk
 190 */
 191        return;
 192#else
 193        ptr = bitmap->bp[page].map;
 194        bitmap->bp[page].map = NULL;
 195        bitmap->missing_pages++;
 196        bitmap_free_page(bitmap, ptr);
 197        return;
 198#endif
 199}
 200
 201
 202/*
 203 * bitmap file handling - read and write the bitmap file and its superblock
 204 */
 205
 206/*
 207 * basic page I/O operations
 208 */
 209
 210/* IO operations when bitmap is stored near all superblocks */
 211static struct page *read_sb_page(mddev_t *mddev, long offset,
 212                                 struct page *page,
 213                                 unsigned long index, int size)
 214{
 215        /* choose a good rdev and read the page from there */
 216
 217        mdk_rdev_t *rdev;
 218        struct list_head *tmp;
 219        sector_t target;
 220
 221        if (!page)
 222                page = alloc_page(GFP_KERNEL);
 223        if (!page)
 224                return ERR_PTR(-ENOMEM);
 225
 226        rdev_for_each(rdev, tmp, mddev) {
 227                if (! test_bit(In_sync, &rdev->flags)
 228                    || test_bit(Faulty, &rdev->flags))
 229                        continue;
 230
 231                target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
 232
 233                if (sync_page_io(rdev->bdev, target,
 234                                 roundup(size, bdev_hardsect_size(rdev->bdev)),
 235                                 page, READ)) {
 236                        page->index = index;
 237                        attach_page_buffers(page, NULL); /* so that free_buffer will
 238                                                          * quietly no-op */
 239                        return page;
 240                }
 241        }
 242        return ERR_PTR(-EIO);
 243
 244}
 245
 246static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
 247{
 248        /* Iterate the disks of an mddev, using rcu to protect access to the
 249         * linked list, and raising the refcount of devices we return to ensure
 250         * they don't disappear while in use.
 251         * As devices are only added or removed when raid_disk is < 0 and
 252         * nr_pending is 0 and In_sync is clear, the entries we return will
 253         * still be in the same position on the list when we re-enter
 254         * list_for_each_continue_rcu.
 255         */
 256        struct list_head *pos;
 257        rcu_read_lock();
 258        if (rdev == NULL)
 259                /* start at the beginning */
 260                pos = &mddev->disks;
 261        else {
 262                /* release the previous rdev and start from there. */
 263                rdev_dec_pending(rdev, mddev);
 264                pos = &rdev->same_set;
 265        }
 266        list_for_each_continue_rcu(pos, &mddev->disks) {
 267                rdev = list_entry(pos, mdk_rdev_t, same_set);
 268                if (rdev->raid_disk >= 0 &&
 269                    test_bit(In_sync, &rdev->flags) &&
 270                    !test_bit(Faulty, &rdev->flags)) {
 271                        /* this is a usable devices */
 272                        atomic_inc(&rdev->nr_pending);
 273                        rcu_read_unlock();
 274                        return rdev;
 275                }
 276        }
 277        rcu_read_unlock();
 278        return NULL;
 279}
 280
 281static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
 282{
 283        mdk_rdev_t *rdev = NULL;
 284        mddev_t *mddev = bitmap->mddev;
 285
 286        while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
 287                        int size = PAGE_SIZE;
 288                        if (page->index == bitmap->file_pages-1)
 289                                size = roundup(bitmap->last_page_size,
 290                                               bdev_hardsect_size(rdev->bdev));
 291                        /* Just make sure we aren't corrupting data or
 292                         * metadata
 293                         */
 294                        if (bitmap->offset < 0) {
 295                                /* DATA  BITMAP METADATA  */
 296                                if (bitmap->offset
 297                                    + (long)(page->index * (PAGE_SIZE/512))
 298                                    + size/512 > 0)
 299                                        /* bitmap runs in to metadata */
 300                                        goto bad_alignment;
 301                                if (rdev->data_offset + mddev->size*2
 302                                    > rdev->sb_start + bitmap->offset)
 303                                        /* data runs in to bitmap */
 304                                        goto bad_alignment;
 305                        } else if (rdev->sb_start < rdev->data_offset) {
 306                                /* METADATA BITMAP DATA */
 307                                if (rdev->sb_start
 308                                    + bitmap->offset
 309                                    + page->index*(PAGE_SIZE/512) + size/512
 310                                    > rdev->data_offset)
 311                                        /* bitmap runs in to data */
 312                                        goto bad_alignment;
 313                        } else {
 314                                /* DATA METADATA BITMAP - no problems */
 315                        }
 316                        md_super_write(mddev, rdev,
 317                                       rdev->sb_start + bitmap->offset
 318                                       + page->index * (PAGE_SIZE/512),
 319                                       size,
 320                                       page);
 321        }
 322
 323        if (wait)
 324                md_super_wait(mddev);
 325        return 0;
 326
 327 bad_alignment:
 328        rcu_read_unlock();
 329        return -EINVAL;
 330}
 331
 332static void bitmap_file_kick(struct bitmap *bitmap);
 333/*
 334 * write out a page to a file
 335 */
 336static void write_page(struct bitmap *bitmap, struct page *page, int wait)
 337{
 338        struct buffer_head *bh;
 339
 340        if (bitmap->file == NULL) {
 341                switch (write_sb_page(bitmap, page, wait)) {
 342                case -EINVAL:
 343                        bitmap->flags |= BITMAP_WRITE_ERROR;
 344                }
 345        } else {
 346
 347                bh = page_buffers(page);
 348
 349                while (bh && bh->b_blocknr) {
 350                        atomic_inc(&bitmap->pending_writes);
 351                        set_buffer_locked(bh);
 352                        set_buffer_mapped(bh);
 353                        submit_bh(WRITE, bh);
 354                        bh = bh->b_this_page;
 355                }
 356
 357                if (wait) {
 358                        wait_event(bitmap->write_wait,
 359                                   atomic_read(&bitmap->pending_writes)==0);
 360                }
 361        }
 362        if (bitmap->flags & BITMAP_WRITE_ERROR)
 363                bitmap_file_kick(bitmap);
 364}
 365
 366static void end_bitmap_write(struct buffer_head *bh, int uptodate)
 367{
 368        struct bitmap *bitmap = bh->b_private;
 369        unsigned long flags;
 370
 371        if (!uptodate) {
 372                spin_lock_irqsave(&bitmap->lock, flags);
 373                bitmap->flags |= BITMAP_WRITE_ERROR;
 374                spin_unlock_irqrestore(&bitmap->lock, flags);
 375        }
 376        if (atomic_dec_and_test(&bitmap->pending_writes))
 377                wake_up(&bitmap->write_wait);
 378}
 379
 380/* copied from buffer.c */
 381static void
 382__clear_page_buffers(struct page *page)
 383{
 384        ClearPagePrivate(page);
 385        set_page_private(page, 0);
 386        page_cache_release(page);
 387}
 388static void free_buffers(struct page *page)
 389{
 390        struct buffer_head *bh = page_buffers(page);
 391
 392        while (bh) {
 393                struct buffer_head *next = bh->b_this_page;
 394                free_buffer_head(bh);
 395                bh = next;
 396        }
 397        __clear_page_buffers(page);
 398        put_page(page);
 399}
 400
 401/* read a page from a file.
 402 * We both read the page, and attach buffers to the page to record the
 403 * address of each block (using bmap).  These addresses will be used
 404 * to write the block later, completely bypassing the filesystem.
 405 * This usage is similar to how swap files are handled, and allows us
 406 * to write to a file with no concerns of memory allocation failing.
 407 */
 408static struct page *read_page(struct file *file, unsigned long index,
 409                              struct bitmap *bitmap,
 410                              unsigned long count)
 411{
 412        struct page *page = NULL;
 413        struct inode *inode = file->f_path.dentry->d_inode;
 414        struct buffer_head *bh;
 415        sector_t block;
 416
 417        PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE,
 418                        (unsigned long long)index << PAGE_SHIFT);
 419
 420        page = alloc_page(GFP_KERNEL);
 421        if (!page)
 422                page = ERR_PTR(-ENOMEM);
 423        if (IS_ERR(page))
 424                goto out;
 425
 426        bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
 427        if (!bh) {
 428                put_page(page);
 429                page = ERR_PTR(-ENOMEM);
 430                goto out;
 431        }
 432        attach_page_buffers(page, bh);
 433        block = index << (PAGE_SHIFT - inode->i_blkbits);
 434        while (bh) {
 435                if (count == 0)
 436                        bh->b_blocknr = 0;
 437                else {
 438                        bh->b_blocknr = bmap(inode, block);
 439                        if (bh->b_blocknr == 0) {
 440                                /* Cannot use this file! */
 441                                free_buffers(page);
 442                                page = ERR_PTR(-EINVAL);
 443                                goto out;
 444                        }
 445                        bh->b_bdev = inode->i_sb->s_bdev;
 446                        if (count < (1<<inode->i_blkbits))
 447                                count = 0;
 448                        else
 449                                count -= (1<<inode->i_blkbits);
 450
 451                        bh->b_end_io = end_bitmap_write;
 452                        bh->b_private = bitmap;
 453                        atomic_inc(&bitmap->pending_writes);
 454                        set_buffer_locked(bh);
 455                        set_buffer_mapped(bh);
 456                        submit_bh(READ, bh);
 457                }
 458                block++;
 459                bh = bh->b_this_page;
 460        }
 461        page->index = index;
 462
 463        wait_event(bitmap->write_wait,
 464                   atomic_read(&bitmap->pending_writes)==0);
 465        if (bitmap->flags & BITMAP_WRITE_ERROR) {
 466                free_buffers(page);
 467                page = ERR_PTR(-EIO);
 468        }
 469out:
 470        if (IS_ERR(page))
 471                printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n",
 472                        (int)PAGE_SIZE,
 473                        (unsigned long long)index << PAGE_SHIFT,
 474                        PTR_ERR(page));
 475        return page;
 476}
 477
 478/*
 479 * bitmap file superblock operations
 480 */
 481
 482/* update the event counter and sync the superblock to disk */
 483void bitmap_update_sb(struct bitmap *bitmap)
 484{
 485        bitmap_super_t *sb;
 486        unsigned long flags;
 487
 488        if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
 489                return;
 490        spin_lock_irqsave(&bitmap->lock, flags);
 491        if (!bitmap->sb_page) { /* no superblock */
 492                spin_unlock_irqrestore(&bitmap->lock, flags);
 493                return;
 494        }
 495        spin_unlock_irqrestore(&bitmap->lock, flags);
 496        sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 497        sb->events = cpu_to_le64(bitmap->mddev->events);
 498        if (bitmap->mddev->events < bitmap->events_cleared) {
 499                /* rocking back to read-only */
 500                bitmap->events_cleared = bitmap->mddev->events;
 501                sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
 502        }
 503        kunmap_atomic(sb, KM_USER0);
 504        write_page(bitmap, bitmap->sb_page, 1);
 505}
 506
 507/* print out the bitmap file superblock */
 508void bitmap_print_sb(struct bitmap *bitmap)
 509{
 510        bitmap_super_t *sb;
 511
 512        if (!bitmap || !bitmap->sb_page)
 513                return;
 514        sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 515        printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
 516        printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
 517        printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
 518        printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
 519                                        *(__u32 *)(sb->uuid+0),
 520                                        *(__u32 *)(sb->uuid+4),
 521                                        *(__u32 *)(sb->uuid+8),
 522                                        *(__u32 *)(sb->uuid+12));
 523        printk(KERN_DEBUG "        events: %llu\n",
 524                        (unsigned long long) le64_to_cpu(sb->events));
 525        printk(KERN_DEBUG "events cleared: %llu\n",
 526                        (unsigned long long) le64_to_cpu(sb->events_cleared));
 527        printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
 528        printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
 529        printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
 530        printk(KERN_DEBUG "     sync size: %llu KB\n",
 531                        (unsigned long long)le64_to_cpu(sb->sync_size)/2);
 532        printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
 533        kunmap_atomic(sb, KM_USER0);
 534}
 535
 536/* read the superblock from the bitmap file and initialize some bitmap fields */
 537static int bitmap_read_sb(struct bitmap *bitmap)
 538{
 539        char *reason = NULL;
 540        bitmap_super_t *sb;
 541        unsigned long chunksize, daemon_sleep, write_behind;
 542        unsigned long long events;
 543        int err = -EINVAL;
 544
 545        /* page 0 is the superblock, read it... */
 546        if (bitmap->file) {
 547                loff_t isize = i_size_read(bitmap->file->f_mapping->host);
 548                int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
 549
 550                bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes);
 551        } else {
 552                bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset,
 553                                               NULL,
 554                                               0, sizeof(bitmap_super_t));
 555        }
 556        if (IS_ERR(bitmap->sb_page)) {
 557                err = PTR_ERR(bitmap->sb_page);
 558                bitmap->sb_page = NULL;
 559                return err;
 560        }
 561
 562        sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 563
 564        chunksize = le32_to_cpu(sb->chunksize);
 565        daemon_sleep = le32_to_cpu(sb->daemon_sleep);
 566        write_behind = le32_to_cpu(sb->write_behind);
 567
 568        /* verify that the bitmap-specific fields are valid */
 569        if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
 570                reason = "bad magic";
 571        else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
 572                 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
 573                reason = "unrecognized superblock version";
 574        else if (chunksize < PAGE_SIZE)
 575                reason = "bitmap chunksize too small";
 576        else if ((1 << ffz(~chunksize)) != chunksize)
 577                reason = "bitmap chunksize not a power of 2";
 578        else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ)
 579                reason = "daemon sleep period out of range";
 580        else if (write_behind > COUNTER_MAX)
 581                reason = "write-behind limit out of range (0 - 16383)";
 582        if (reason) {
 583                printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
 584                        bmname(bitmap), reason);
 585                goto out;
 586        }
 587
 588        /* keep the array size field of the bitmap superblock up to date */
 589        sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
 590
 591        if (!bitmap->mddev->persistent)
 592                goto success;
 593
 594        /*
 595         * if we have a persistent array superblock, compare the
 596         * bitmap's UUID and event counter to the mddev's
 597         */
 598        if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
 599                printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n",
 600                        bmname(bitmap));
 601                goto out;
 602        }
 603        events = le64_to_cpu(sb->events);
 604        if (events < bitmap->mddev->events) {
 605                printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
 606                        "-- forcing full recovery\n", bmname(bitmap), events,
 607                        (unsigned long long) bitmap->mddev->events);
 608                sb->state |= cpu_to_le32(BITMAP_STALE);
 609        }
 610success:
 611        /* assign fields using values from superblock */
 612        bitmap->chunksize = chunksize;
 613        bitmap->daemon_sleep = daemon_sleep;
 614        bitmap->daemon_lastrun = jiffies;
 615        bitmap->max_write_behind = write_behind;
 616        bitmap->flags |= le32_to_cpu(sb->state);
 617        if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
 618                bitmap->flags |= BITMAP_HOSTENDIAN;
 619        bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
 620        if (sb->state & cpu_to_le32(BITMAP_STALE))
 621                bitmap->events_cleared = bitmap->mddev->events;
 622        err = 0;
 623out:
 624        kunmap_atomic(sb, KM_USER0);
 625        if (err)
 626                bitmap_print_sb(bitmap);
 627        return err;
 628}
 629
 630enum bitmap_mask_op {
 631        MASK_SET,
 632        MASK_UNSET
 633};
 634
 635/* record the state of the bitmap in the superblock.  Return the old value */
 636static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
 637                             enum bitmap_mask_op op)
 638{
 639        bitmap_super_t *sb;
 640        unsigned long flags;
 641        int old;
 642
 643        spin_lock_irqsave(&bitmap->lock, flags);
 644        if (!bitmap->sb_page) { /* can't set the state */
 645                spin_unlock_irqrestore(&bitmap->lock, flags);
 646                return 0;
 647        }
 648        spin_unlock_irqrestore(&bitmap->lock, flags);
 649        sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 650        old = le32_to_cpu(sb->state) & bits;
 651        switch (op) {
 652                case MASK_SET: sb->state |= cpu_to_le32(bits);
 653                                break;
 654                case MASK_UNSET: sb->state &= cpu_to_le32(~bits);
 655                                break;
 656                default: BUG();
 657        }
 658        kunmap_atomic(sb, KM_USER0);
 659        return old;
 660}
 661
 662/*
 663 * general bitmap file operations
 664 */
 665
 666/* calculate the index of the page that contains this bit */
 667static inline unsigned long file_page_index(unsigned long chunk)
 668{
 669        return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
 670}
 671
 672/* calculate the (bit) offset of this bit within a page */
 673static inline unsigned long file_page_offset(unsigned long chunk)
 674{
 675        return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
 676}
 677
 678/*
 679 * return a pointer to the page in the filemap that contains the given bit
 680 *
 681 * this lookup is complicated by the fact that the bitmap sb might be exactly
 682 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
 683 * 0 or page 1
 684 */
 685static inline struct page *filemap_get_page(struct bitmap *bitmap,
 686                                        unsigned long chunk)
 687{
 688        if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
 689        return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
 690}
 691
 692
 693static void bitmap_file_unmap(struct bitmap *bitmap)
 694{
 695        struct page **map, *sb_page;
 696        unsigned long *attr;
 697        int pages;
 698        unsigned long flags;
 699
 700        spin_lock_irqsave(&bitmap->lock, flags);
 701        map = bitmap->filemap;
 702        bitmap->filemap = NULL;
 703        attr = bitmap->filemap_attr;
 704        bitmap->filemap_attr = NULL;
 705        pages = bitmap->file_pages;
 706        bitmap->file_pages = 0;
 707        sb_page = bitmap->sb_page;
 708        bitmap->sb_page = NULL;
 709        spin_unlock_irqrestore(&bitmap->lock, flags);
 710
 711        while (pages--)
 712                if (map[pages]->index != 0) /* 0 is sb_page, release it below */
 713                        free_buffers(map[pages]);
 714        kfree(map);
 715        kfree(attr);
 716
 717        if (sb_page)
 718                free_buffers(sb_page);
 719}
 720
 721static void bitmap_file_put(struct bitmap *bitmap)
 722{
 723        struct file *file;
 724        unsigned long flags;
 725
 726        spin_lock_irqsave(&bitmap->lock, flags);
 727        file = bitmap->file;
 728        bitmap->file = NULL;
 729        spin_unlock_irqrestore(&bitmap->lock, flags);
 730
 731        if (file)
 732                wait_event(bitmap->write_wait,
 733                           atomic_read(&bitmap->pending_writes)==0);
 734        bitmap_file_unmap(bitmap);
 735
 736        if (file) {
 737                struct inode *inode = file->f_path.dentry->d_inode;
 738                invalidate_mapping_pages(inode->i_mapping, 0, -1);
 739                fput(file);
 740        }
 741}
 742
 743
 744/*
 745 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
 746 * then it is no longer reliable, so we stop using it and we mark the file
 747 * as failed in the superblock
 748 */
 749static void bitmap_file_kick(struct bitmap *bitmap)
 750{
 751        char *path, *ptr = NULL;
 752
 753        if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) {
 754                bitmap_update_sb(bitmap);
 755
 756                if (bitmap->file) {
 757                        path = kmalloc(PAGE_SIZE, GFP_KERNEL);
 758                        if (path)
 759                                ptr = d_path(&bitmap->file->f_path, path,
 760                                             PAGE_SIZE);
 761
 762
 763                        printk(KERN_ALERT
 764                              "%s: kicking failed bitmap file %s from array!\n",
 765                              bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
 766
 767                        kfree(path);
 768                } else
 769                        printk(KERN_ALERT
 770                               "%s: disabling internal bitmap due to errors\n",
 771                               bmname(bitmap));
 772        }
 773
 774        bitmap_file_put(bitmap);
 775
 776        return;
 777}
 778
 779enum bitmap_page_attr {
 780        BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced
 781        BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared
 782        BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced
 783};
 784
 785static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
 786                                enum bitmap_page_attr attr)
 787{
 788        __set_bit((page->index<<2) + attr, bitmap->filemap_attr);
 789}
 790
 791static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
 792                                enum bitmap_page_attr attr)
 793{
 794        __clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
 795}
 796
 797static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
 798                                           enum bitmap_page_attr attr)
 799{
 800        return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
 801}
 802
 803/*
 804 * bitmap_file_set_bit -- called before performing a write to the md device
 805 * to set (and eventually sync) a particular bit in the bitmap file
 806 *
 807 * we set the bit immediately, then we record the page number so that
 808 * when an unplug occurs, we can flush the dirty pages out to disk
 809 */
 810static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
 811{
 812        unsigned long bit;
 813        struct page *page;
 814        void *kaddr;
 815        unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
 816
 817        if (!bitmap->filemap) {
 818                return;
 819        }
 820
 821        page = filemap_get_page(bitmap, chunk);
 822        if (!page) return;
 823        bit = file_page_offset(chunk);
 824
 825        /* set the bit */
 826        kaddr = kmap_atomic(page, KM_USER0);
 827        if (bitmap->flags & BITMAP_HOSTENDIAN)
 828                set_bit(bit, kaddr);
 829        else
 830                ext2_set_bit(bit, kaddr);
 831        kunmap_atomic(kaddr, KM_USER0);
 832        PRINTK("set file bit %lu page %lu\n", bit, page->index);
 833
 834        /* record page number so it gets flushed to disk when unplug occurs */
 835        set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
 836
 837}
 838
 839/* this gets called when the md device is ready to unplug its underlying
 840 * (slave) device queues -- before we let any writes go down, we need to
 841 * sync the dirty pages of the bitmap file to disk */
 842void bitmap_unplug(struct bitmap *bitmap)
 843{
 844        unsigned long i, flags;
 845        int dirty, need_write;
 846        struct page *page;
 847        int wait = 0;
 848
 849        if (!bitmap)
 850                return;
 851
 852        /* look at each page to see if there are any set bits that need to be
 853         * flushed out to disk */
 854        for (i = 0; i < bitmap->file_pages; i++) {
 855                spin_lock_irqsave(&bitmap->lock, flags);
 856                if (!bitmap->filemap) {
 857                        spin_unlock_irqrestore(&bitmap->lock, flags);
 858                        return;
 859                }
 860                page = bitmap->filemap[i];
 861                dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
 862                need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
 863                clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
 864                clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
 865                if (dirty)
 866                        wait = 1;
 867                spin_unlock_irqrestore(&bitmap->lock, flags);
 868
 869                if (dirty | need_write)
 870                        write_page(bitmap, page, 0);
 871        }
 872        if (wait) { /* if any writes were performed, we need to wait on them */
 873                if (bitmap->file)
 874                        wait_event(bitmap->write_wait,
 875                                   atomic_read(&bitmap->pending_writes)==0);
 876                else
 877                        md_super_wait(bitmap->mddev);
 878        }
 879        if (bitmap->flags & BITMAP_WRITE_ERROR)
 880                bitmap_file_kick(bitmap);
 881}
 882
 883static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
 884/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
 885 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
 886 * memory mapping of the bitmap file
 887 * Special cases:
 888 *   if there's no bitmap file, or if the bitmap file had been
 889 *   previously kicked from the array, we mark all the bits as
 890 *   1's in order to cause a full resync.
 891 *
 892 * We ignore all bits for sectors that end earlier than 'start'.
 893 * This is used when reading an out-of-date bitmap...
 894 */
 895static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 896{
 897        unsigned long i, chunks, index, oldindex, bit;
 898        struct page *page = NULL, *oldpage = NULL;
 899        unsigned long num_pages, bit_cnt = 0;
 900        struct file *file;
 901        unsigned long bytes, offset;
 902        int outofdate;
 903        int ret = -ENOSPC;
 904        void *paddr;
 905
 906        chunks = bitmap->chunks;
 907        file = bitmap->file;
 908
 909        BUG_ON(!file && !bitmap->offset);
 910
 911#ifdef INJECT_FAULTS_3
 912        outofdate = 1;
 913#else
 914        outofdate = bitmap->flags & BITMAP_STALE;
 915#endif
 916        if (outofdate)
 917                printk(KERN_INFO "%s: bitmap file is out of date, doing full "
 918                        "recovery\n", bmname(bitmap));
 919
 920        bytes = (chunks + 7) / 8;
 921
 922        num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
 923
 924        if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
 925                printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
 926                        bmname(bitmap),
 927                        (unsigned long) i_size_read(file->f_mapping->host),
 928                        bytes + sizeof(bitmap_super_t));
 929                goto err;
 930        }
 931
 932        ret = -ENOMEM;
 933
 934        bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
 935        if (!bitmap->filemap)
 936                goto err;
 937
 938        /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */
 939        bitmap->filemap_attr = kzalloc(
 940                roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
 941                GFP_KERNEL);
 942        if (!bitmap->filemap_attr)
 943                goto err;
 944
 945        oldindex = ~0L;
 946
 947        for (i = 0; i < chunks; i++) {
 948                int b;
 949                index = file_page_index(i);
 950                bit = file_page_offset(i);
 951                if (index != oldindex) { /* this is a new page, read it in */
 952                        int count;
 953                        /* unmap the old page, we're done with it */
 954                        if (index == num_pages-1)
 955                                count = bytes + sizeof(bitmap_super_t)
 956                                        - index * PAGE_SIZE;
 957                        else
 958                                count = PAGE_SIZE;
 959                        if (index == 0) {
 960                                /*
 961                                 * if we're here then the superblock page
 962                                 * contains some bits (PAGE_SIZE != sizeof sb)
 963                                 * we've already read it in, so just use it
 964                                 */
 965                                page = bitmap->sb_page;
 966                                offset = sizeof(bitmap_super_t);
 967                                read_sb_page(bitmap->mddev, bitmap->offset,
 968                                             page,
 969                                             index, count);
 970                        } else if (file) {
 971                                page = read_page(file, index, bitmap, count);
 972                                offset = 0;
 973                        } else {
 974                                page = read_sb_page(bitmap->mddev, bitmap->offset,
 975                                                    NULL,
 976                                                    index, count);
 977                                offset = 0;
 978                        }
 979                        if (IS_ERR(page)) { /* read error */
 980                                ret = PTR_ERR(page);
 981                                goto err;
 982                        }
 983
 984                        oldindex = index;
 985                        oldpage = page;
 986
 987                        if (outofdate) {
 988                                /*
 989                                 * if bitmap is out of date, dirty the
 990                                 * whole page and write it out
 991                                 */
 992                                paddr = kmap_atomic(page, KM_USER0);
 993                                memset(paddr + offset, 0xff,
 994                                       PAGE_SIZE - offset);
 995                                kunmap_atomic(paddr, KM_USER0);
 996                                write_page(bitmap, page, 1);
 997
 998                                ret = -EIO;
 999                                if (bitmap->flags & BITMAP_WRITE_ERROR) {
1000                                        /* release, page not in filemap yet */
1001                                        put_page(page);
1002                                        goto err;
1003                                }
1004                        }
1005
1006                        bitmap->filemap[bitmap->file_pages++] = page;
1007                        bitmap->last_page_size = count;
1008                }
1009                paddr = kmap_atomic(page, KM_USER0);
1010                if (bitmap->flags & BITMAP_HOSTENDIAN)
1011                        b = test_bit(bit, paddr);
1012                else
1013                        b = ext2_test_bit(bit, paddr);
1014                kunmap_atomic(paddr, KM_USER0);
1015                if (b) {
1016                        /* if the disk bit is set, set the memory bit */
1017                        bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
1018                                               ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
1019                                );
1020                        bit_cnt++;
1021                        set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1022                }
1023        }
1024
1025        /* everything went OK */
1026        ret = 0;
1027        bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
1028
1029        if (bit_cnt) { /* Kick recovery if any bits were set */
1030                set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1031                md_wakeup_thread(bitmap->mddev->thread);
1032        }
1033
1034        printk(KERN_INFO "%s: bitmap initialized from disk: "
1035                "read %lu/%lu pages, set %lu bits\n",
1036                bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt);
1037
1038        return 0;
1039
1040 err:
1041        printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1042               bmname(bitmap), ret);
1043        return ret;
1044}
1045
1046void bitmap_write_all(struct bitmap *bitmap)
1047{
1048        /* We don't actually write all bitmap blocks here,
1049         * just flag them as needing to be written
1050         */
1051        int i;
1052
1053        for (i=0; i < bitmap->file_pages; i++)
1054                set_page_attr(bitmap, bitmap->filemap[i],
1055                              BITMAP_PAGE_NEEDWRITE);
1056}
1057
1058
1059static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
1060{
1061        sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1062        unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1063        bitmap->bp[page].count += inc;
1064/*
1065        if (page == 0) printk("count page 0, offset %llu: %d gives %d\n",
1066                              (unsigned long long)offset, inc, bitmap->bp[page].count);
1067*/
1068        bitmap_checkfree(bitmap, page);
1069}
1070static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1071                                            sector_t offset, int *blocks,
1072                                            int create);
1073
1074/*
1075 * bitmap daemon -- periodically wakes up to clean bits and flush pages
1076 *                      out to disk
1077 */
1078
1079void bitmap_daemon_work(struct bitmap *bitmap)
1080{
1081        unsigned long j;
1082        unsigned long flags;
1083        struct page *page = NULL, *lastpage = NULL;
1084        int blocks;
1085        void *paddr;
1086
1087        if (bitmap == NULL)
1088                return;
1089        if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
1090                goto done;
1091
1092        bitmap->daemon_lastrun = jiffies;
1093        if (bitmap->allclean) {
1094                bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1095                return;
1096        }
1097        bitmap->allclean = 1;
1098
1099        for (j = 0; j < bitmap->chunks; j++) {
1100                bitmap_counter_t *bmc;
1101                spin_lock_irqsave(&bitmap->lock, flags);
1102                if (!bitmap->filemap) {
1103                        /* error or shutdown */
1104                        spin_unlock_irqrestore(&bitmap->lock, flags);
1105                        break;
1106                }
1107
1108                page = filemap_get_page(bitmap, j);
1109
1110                if (page != lastpage) {
1111                        /* skip this page unless it's marked as needing cleaning */
1112                        if (!test_page_attr(bitmap, page, BITMAP_PAGE_CLEAN)) {
1113                                int need_write = test_page_attr(bitmap, page,
1114                                                                BITMAP_PAGE_NEEDWRITE);
1115                                if (need_write)
1116                                        clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1117
1118                                spin_unlock_irqrestore(&bitmap->lock, flags);
1119                                if (need_write) {
1120                                        write_page(bitmap, page, 0);
1121                                        bitmap->allclean = 0;
1122                                }
1123                                continue;
1124                        }
1125
1126                        /* grab the new page, sync and release the old */
1127                        if (lastpage != NULL) {
1128                                if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1129                                        clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1130                                        spin_unlock_irqrestore(&bitmap->lock, flags);
1131                                        write_page(bitmap, lastpage, 0);
1132                                } else {
1133                                        set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1134                                        spin_unlock_irqrestore(&bitmap->lock, flags);
1135                                }
1136                        } else
1137                                spin_unlock_irqrestore(&bitmap->lock, flags);
1138                        lastpage = page;
1139
1140                        /* We are possibly going to clear some bits, so make
1141                         * sure that events_cleared is up-to-date.
1142                         */
1143                        if (bitmap->need_sync) {
1144                                bitmap_super_t *sb;
1145                                bitmap->need_sync = 0;
1146                                sb = kmap_atomic(bitmap->sb_page, KM_USER0);
1147                                sb->events_cleared =
1148                                        cpu_to_le64(bitmap->events_cleared);
1149                                kunmap_atomic(sb, KM_USER0);
1150                                write_page(bitmap, bitmap->sb_page, 1);
1151                        }
1152                        spin_lock_irqsave(&bitmap->lock, flags);
1153                        clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1154                }
1155                bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
1156                                        &blocks, 0);
1157                if (bmc) {
1158/*
1159  if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
1160*/
1161                        if (*bmc)
1162                                bitmap->allclean = 0;
1163
1164                        if (*bmc == 2) {
1165                                *bmc=1; /* maybe clear the bit next time */
1166                                set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1167                        } else if (*bmc == 1) {
1168                                /* we can clear the bit */
1169                                *bmc = 0;
1170                                bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
1171                                                  -1);
1172
1173                                /* clear the bit */
1174                                paddr = kmap_atomic(page, KM_USER0);
1175                                if (bitmap->flags & BITMAP_HOSTENDIAN)
1176                                        clear_bit(file_page_offset(j), paddr);
1177                                else
1178                                        ext2_clear_bit(file_page_offset(j), paddr);
1179                                kunmap_atomic(paddr, KM_USER0);
1180                        }
1181                }
1182                spin_unlock_irqrestore(&bitmap->lock, flags);
1183        }
1184
1185        /* now sync the final page */
1186        if (lastpage != NULL) {
1187                spin_lock_irqsave(&bitmap->lock, flags);
1188                if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
1189                        clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1190                        spin_unlock_irqrestore(&bitmap->lock, flags);
1191                        write_page(bitmap, lastpage, 0);
1192                } else {
1193                        set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1194                        spin_unlock_irqrestore(&bitmap->lock, flags);
1195                }
1196        }
1197
1198 done:
1199        if (bitmap->allclean == 0)
1200                bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ;
1201}
1202
1203static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1204                                            sector_t offset, int *blocks,
1205                                            int create)
1206{
1207        /* If 'create', we might release the lock and reclaim it.
1208         * The lock must have been taken with interrupts enabled.
1209         * If !create, we don't release the lock.
1210         */
1211        sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1212        unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1213        unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1214        sector_t csize;
1215
1216        if (bitmap_checkpage(bitmap, page, create) < 0) {
1217                csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1218                *blocks = csize - (offset & (csize- 1));
1219                return NULL;
1220        }
1221        /* now locked ... */
1222
1223        if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1224                /* should we use the first or second counter field
1225                 * of the hijacked pointer? */
1226                int hi = (pageoff > PAGE_COUNTER_MASK);
1227                csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
1228                                          PAGE_COUNTER_SHIFT - 1);
1229                *blocks = csize - (offset & (csize- 1));
1230                return  &((bitmap_counter_t *)
1231                          &bitmap->bp[page].map)[hi];
1232        } else { /* page is allocated */
1233                csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1234                *blocks = csize - (offset & (csize- 1));
1235                return (bitmap_counter_t *)
1236                        &(bitmap->bp[page].map[pageoff]);
1237        }
1238}
1239
1240int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1241{
1242        if (!bitmap) return 0;
1243
1244        if (behind) {
1245                atomic_inc(&bitmap->behind_writes);
1246                PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n",
1247                  atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
1248        }
1249
1250        while (sectors) {
1251                int blocks;
1252                bitmap_counter_t *bmc;
1253
1254                spin_lock_irq(&bitmap->lock);
1255                bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
1256                if (!bmc) {
1257                        spin_unlock_irq(&bitmap->lock);
1258                        return 0;
1259                }
1260
1261                if (unlikely((*bmc & COUNTER_MAX) == COUNTER_MAX)) {
1262                        DEFINE_WAIT(__wait);
1263                        /* note that it is safe to do the prepare_to_wait
1264                         * after the test as long as we do it before dropping
1265                         * the spinlock.
1266                         */
1267                        prepare_to_wait(&bitmap->overflow_wait, &__wait,
1268                                        TASK_UNINTERRUPTIBLE);
1269                        spin_unlock_irq(&bitmap->lock);
1270                        blk_unplug(bitmap->mddev->queue);
1271                        schedule();
1272                        finish_wait(&bitmap->overflow_wait, &__wait);
1273                        continue;
1274                }
1275
1276                switch(*bmc) {
1277                case 0:
1278                        bitmap_file_set_bit(bitmap, offset);
1279                        bitmap_count_page(bitmap,offset, 1);
1280                        blk_plug_device_unlocked(bitmap->mddev->queue);
1281                        /* fall through */
1282                case 1:
1283                        *bmc = 2;
1284                }
1285
1286                (*bmc)++;
1287
1288                spin_unlock_irq(&bitmap->lock);
1289
1290                offset += blocks;
1291                if (sectors > blocks)
1292                        sectors -= blocks;
1293                else sectors = 0;
1294        }
1295        bitmap->allclean = 0;
1296        return 0;
1297}
1298
1299void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1300                     int success, int behind)
1301{
1302        if (!bitmap) return;
1303        if (behind) {
1304                atomic_dec(&bitmap->behind_writes);
1305                PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
1306                  atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
1307        }
1308
1309        while (sectors) {
1310                int blocks;
1311                unsigned long flags;
1312                bitmap_counter_t *bmc;
1313
1314                spin_lock_irqsave(&bitmap->lock, flags);
1315                bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
1316                if (!bmc) {
1317                        spin_unlock_irqrestore(&bitmap->lock, flags);
1318                        return;
1319                }
1320
1321                if (success &&
1322                    bitmap->events_cleared < bitmap->mddev->events) {
1323                        bitmap->events_cleared = bitmap->mddev->events;
1324                        bitmap->need_sync = 1;
1325                }
1326
1327                if (!success && ! (*bmc & NEEDED_MASK))
1328                        *bmc |= NEEDED_MASK;
1329
1330                if ((*bmc & COUNTER_MAX) == COUNTER_MAX)
1331                        wake_up(&bitmap->overflow_wait);
1332
1333                (*bmc)--;
1334                if (*bmc <= 2) {
1335                        set_page_attr(bitmap,
1336                                      filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1337                                      BITMAP_PAGE_CLEAN);
1338                }
1339                spin_unlock_irqrestore(&bitmap->lock, flags);
1340                offset += blocks;
1341                if (sectors > blocks)
1342                        sectors -= blocks;
1343                else sectors = 0;
1344        }
1345}
1346
1347int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
1348                        int degraded)
1349{
1350        bitmap_counter_t *bmc;
1351        int rv;
1352        if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1353                *blocks = 1024;
1354                return 1; /* always resync if no bitmap */
1355        }
1356        spin_lock_irq(&bitmap->lock);
1357        bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1358        rv = 0;
1359        if (bmc) {
1360                /* locked */
1361                if (RESYNC(*bmc))
1362                        rv = 1;
1363                else if (NEEDED(*bmc)) {
1364                        rv = 1;
1365                        if (!degraded) { /* don't set/clear bits if degraded */
1366                                *bmc |= RESYNC_MASK;
1367                                *bmc &= ~NEEDED_MASK;
1368                        }
1369                }
1370        }
1371        spin_unlock_irq(&bitmap->lock);
1372        bitmap->allclean = 0;
1373        return rv;
1374}
1375
1376void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
1377{
1378        bitmap_counter_t *bmc;
1379        unsigned long flags;
1380/*
1381        if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted);
1382*/      if (bitmap == NULL) {
1383                *blocks = 1024;
1384                return;
1385        }
1386        spin_lock_irqsave(&bitmap->lock, flags);
1387        bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1388        if (bmc == NULL)
1389                goto unlock;
1390        /* locked */
1391/*
1392        if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks);
1393*/
1394        if (RESYNC(*bmc)) {
1395                *bmc &= ~RESYNC_MASK;
1396
1397                if (!NEEDED(*bmc) && aborted)
1398                        *bmc |= NEEDED_MASK;
1399                else {
1400                        if (*bmc <= 2) {
1401                                set_page_attr(bitmap,
1402                                              filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1403                                              BITMAP_PAGE_CLEAN);
1404                        }
1405                }
1406        }
1407 unlock:
1408        spin_unlock_irqrestore(&bitmap->lock, flags);
1409        bitmap->allclean = 0;
1410}
1411
1412void bitmap_close_sync(struct bitmap *bitmap)
1413{
1414        /* Sync has finished, and any bitmap chunks that weren't synced
1415         * properly have been aborted.  It remains to us to clear the
1416         * RESYNC bit wherever it is still on
1417         */
1418        sector_t sector = 0;
1419        int blocks;
1420        if (!bitmap)
1421                return;
1422        while (sector < bitmap->mddev->resync_max_sectors) {
1423                bitmap_end_sync(bitmap, sector, &blocks, 0);
1424                sector += blocks;
1425        }
1426}
1427
1428void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1429{
1430        sector_t s = 0;
1431        int blocks;
1432
1433        if (!bitmap)
1434                return;
1435        if (sector == 0) {
1436                bitmap->last_end_sync = jiffies;
1437                return;
1438        }
1439        if (time_before(jiffies, (bitmap->last_end_sync
1440                                  + bitmap->daemon_sleep * HZ)))
1441                return;
1442        wait_event(bitmap->mddev->recovery_wait,
1443                   atomic_read(&bitmap->mddev->recovery_active) == 0);
1444
1445        sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
1446        s = 0;
1447        while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1448                bitmap_end_sync(bitmap, s, &blocks, 0);
1449                s += blocks;
1450        }
1451        bitmap->last_end_sync = jiffies;
1452}
1453
1454static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1455{
1456        /* For each chunk covered by any of these sectors, set the
1457         * counter to 1 and set resync_needed.  They should all
1458         * be 0 at this point
1459         */
1460
1461        int secs;
1462        bitmap_counter_t *bmc;
1463        spin_lock_irq(&bitmap->lock);
1464        bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
1465        if (!bmc) {
1466                spin_unlock_irq(&bitmap->lock);
1467                return;
1468        }
1469        if (! *bmc) {
1470                struct page *page;
1471                *bmc = 1 | (needed?NEEDED_MASK:0);
1472                bitmap_count_page(bitmap, offset, 1);
1473                page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
1474                set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1475        }
1476        spin_unlock_irq(&bitmap->lock);
1477        bitmap->allclean = 0;
1478}
1479
1480/* dirty the memory and file bits for bitmap chunks "s" to "e" */
1481void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1482{
1483        unsigned long chunk;
1484
1485        for (chunk = s; chunk <= e; chunk++) {
1486                sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap);
1487                bitmap_set_memory_bits(bitmap, sec, 1);
1488                bitmap_file_set_bit(bitmap, sec);
1489        }
1490}
1491
1492/*
1493 * flush out any pending updates
1494 */
1495void bitmap_flush(mddev_t *mddev)
1496{
1497        struct bitmap *bitmap = mddev->bitmap;
1498        int sleep;
1499
1500        if (!bitmap) /* there was no bitmap */
1501                return;
1502
1503        /* run the daemon_work three time to ensure everything is flushed
1504         * that can be
1505         */
1506        sleep = bitmap->daemon_sleep;
1507        bitmap->daemon_sleep = 0;
1508        bitmap_daemon_work(bitmap);
1509        bitmap_daemon_work(bitmap);
1510        bitmap_daemon_work(bitmap);
1511        bitmap->daemon_sleep = sleep;
1512        bitmap_update_sb(bitmap);
1513}
1514
1515/*
1516 * free memory that was allocated
1517 */
1518static void bitmap_free(struct bitmap *bitmap)
1519{
1520        unsigned long k, pages;
1521        struct bitmap_page *bp;
1522
1523        if (!bitmap) /* there was no bitmap */
1524                return;
1525
1526        /* release the bitmap file and kill the daemon */
1527        bitmap_file_put(bitmap);
1528
1529        bp = bitmap->bp;
1530        pages = bitmap->pages;
1531
1532        /* free all allocated memory */
1533
1534        if (bp) /* deallocate the page memory */
1535                for (k = 0; k < pages; k++)
1536                        if (bp[k].map && !bp[k].hijacked)
1537                                kfree(bp[k].map);
1538        kfree(bp);
1539        kfree(bitmap);
1540}
1541void bitmap_destroy(mddev_t *mddev)
1542{
1543        struct bitmap *bitmap = mddev->bitmap;
1544
1545        if (!bitmap) /* there was no bitmap */
1546                return;
1547
1548        mddev->bitmap = NULL; /* disconnect from the md device */
1549        if (mddev->thread)
1550                mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1551
1552        bitmap_free(bitmap);
1553}
1554
1555/*
1556 * initialize the bitmap structure
1557 * if this returns an error, bitmap_destroy must be called to do clean up
1558 */
1559int bitmap_create(mddev_t *mddev)
1560{
1561        struct bitmap *bitmap;
1562        unsigned long blocks = mddev->resync_max_sectors;
1563        unsigned long chunks;
1564        unsigned long pages;
1565        struct file *file = mddev->bitmap_file;
1566        int err;
1567        sector_t start;
1568
1569        BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1570
1571        if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
1572                return 0;
1573
1574        BUG_ON(file && mddev->bitmap_offset);
1575
1576        bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1577        if (!bitmap)
1578                return -ENOMEM;
1579
1580        spin_lock_init(&bitmap->lock);
1581        atomic_set(&bitmap->pending_writes, 0);
1582        init_waitqueue_head(&bitmap->write_wait);
1583        init_waitqueue_head(&bitmap->overflow_wait);
1584
1585        bitmap->mddev = mddev;
1586
1587        bitmap->file = file;
1588        bitmap->offset = mddev->bitmap_offset;
1589        if (file) {
1590                get_file(file);
1591                do_sync_mapping_range(file->f_mapping, 0, LLONG_MAX,
1592                                      SYNC_FILE_RANGE_WAIT_BEFORE |
1593                                      SYNC_FILE_RANGE_WRITE |
1594                                      SYNC_FILE_RANGE_WAIT_AFTER);
1595        }
1596        /* read superblock from bitmap file (this sets bitmap->chunksize) */
1597        err = bitmap_read_sb(bitmap);
1598        if (err)
1599                goto error;
1600
1601        bitmap->chunkshift = ffz(~bitmap->chunksize);
1602
1603        /* now that chunksize and chunkshift are set, we can use these macros */
1604        chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) /
1605                        CHUNK_BLOCK_RATIO(bitmap);
1606        pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
1607
1608        BUG_ON(!pages);
1609
1610        bitmap->chunks = chunks;
1611        bitmap->pages = pages;
1612        bitmap->missing_pages = pages;
1613        bitmap->counter_bits = COUNTER_BITS;
1614
1615        bitmap->syncchunk = ~0UL;
1616
1617#ifdef INJECT_FATAL_FAULT_1
1618        bitmap->bp = NULL;
1619#else
1620        bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
1621#endif
1622        err = -ENOMEM;
1623        if (!bitmap->bp)
1624                goto error;
1625
1626        /* now that we have some pages available, initialize the in-memory
1627         * bitmap from the on-disk bitmap */
1628        start = 0;
1629        if (mddev->degraded == 0
1630            || bitmap->events_cleared == mddev->events)
1631                /* no need to keep dirty bits to optimise a re-add of a missing device */
1632                start = mddev->recovery_cp;
1633        err = bitmap_init_from_disk(bitmap, start);
1634
1635        if (err)
1636                goto error;
1637
1638        printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1639                pages, bmname(bitmap));
1640
1641        mddev->bitmap = bitmap;
1642
1643        mddev->thread->timeout = bitmap->daemon_sleep * HZ;
1644
1645        bitmap_update_sb(bitmap);
1646
1647        return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0;
1648
1649 error:
1650        bitmap_free(bitmap);
1651        return err;
1652}
1653
1654/* the bitmap API -- for raid personalities */
1655EXPORT_SYMBOL(bitmap_startwrite);
1656EXPORT_SYMBOL(bitmap_endwrite);
1657EXPORT_SYMBOL(bitmap_start_sync);
1658EXPORT_SYMBOL(bitmap_end_sync);
1659EXPORT_SYMBOL(bitmap_unplug);
1660EXPORT_SYMBOL(bitmap_close_sync);
1661EXPORT_SYMBOL(bitmap_cond_end_sync);
1662
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.