linux-old/fs/jfs/jfs_metapage.c
<<
>>
Prefs
   1/*
   2 *   Copyright (C) International Business Machines Corp., 2000-2004
   3 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
   4 *
   5 *   This program is free software;  you can redistribute it and/or modify
   6 *   it under the terms of the GNU General Public License as published by
   7 *   the Free Software Foundation; either version 2 of the License, or 
   8 *   (at your option) any later version.
   9 * 
  10 *   This program is distributed in the hope that it will be useful,
  11 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  13 *   the GNU General Public License for more details.
  14 *
  15 *   You should have received a copy of the GNU General Public License
  16 *   along with this program;  if not, write to the Free Software 
  17 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18 */
  19
  20#include <linux/fs.h>
  21#include <linux/init.h>
  22#include "jfs_incore.h"
  23#include "jfs_superblock.h"
  24#include "jfs_filsys.h"
  25#include "jfs_metapage.h"
  26#include "jfs_txnmgr.h"
  27#include "jfs_debug.h"
  28
  29extern struct task_struct *jfsCommitTask;
  30static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
  31static wait_queue_head_t meta_wait;
  32
  33#ifdef CONFIG_JFS_STATISTICS
  34struct {
  35        uint    pagealloc;      /* # of page allocations */
  36        uint    pagefree;       /* # of page frees */
  37        uint    lockwait;       /* # of sleeping lock_metapage() calls */
  38        uint    allocwait;      /* # of sleeping alloc_metapage() calls */
  39} mpStat;
  40#endif
  41
  42
  43#define HASH_BITS 10            /* This makes hash_table 1 4K page */
  44#define HASH_SIZE (1 << HASH_BITS)
  45static struct metapage **hash_table = NULL;
  46static unsigned long hash_order;
  47
  48
  49static inline int metapage_locked(struct metapage *mp)
  50{
  51        return test_bit(META_locked, &mp->flag);
  52}
  53
  54static inline int trylock_metapage(struct metapage *mp)
  55{
  56        return test_and_set_bit(META_locked, &mp->flag);
  57}
  58
  59static inline void unlock_metapage(struct metapage *mp)
  60{
  61        clear_bit(META_locked, &mp->flag);
  62        wake_up(&mp->wait);
  63}
  64
  65static void __lock_metapage(struct metapage *mp)
  66{
  67        DECLARE_WAITQUEUE(wait, current);
  68
  69        INCREMENT(mpStat.lockwait);
  70
  71        add_wait_queue_exclusive(&mp->wait, &wait);
  72        do {
  73                set_current_state(TASK_UNINTERRUPTIBLE);
  74                if (metapage_locked(mp)) {
  75                        spin_unlock(&meta_lock);
  76                        schedule();
  77                        spin_lock(&meta_lock);
  78                }
  79        } while (trylock_metapage(mp));
  80        __set_current_state(TASK_RUNNING);
  81        remove_wait_queue(&mp->wait, &wait);
  82}
  83
  84/* needs meta_lock */
  85static inline void lock_metapage(struct metapage *mp)
  86{
  87        if (trylock_metapage(mp))
  88                __lock_metapage(mp);
  89}
  90
  91/*
  92 * metapage pool is based on Linux 2.5's mempool
  93 *
  94 * Tap into reserved structures in critical paths where waiting on a
  95 * memory allocation could cause deadlock
  96 */
  97#define METAPOOL_MIN_PAGES 32
  98static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES];
  99static int num_reserved = 0;
 100kmem_cache_t *metapage_cache;
 101
 102static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 103{
 104        struct metapage *mp = (struct metapage *)foo;
 105
 106        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 107            SLAB_CTOR_CONSTRUCTOR) {
 108                mp->lid = 0;
 109                mp->lsn = 0;
 110                mp->flag = 0;
 111                mp->data = NULL;
 112                mp->clsn = 0;
 113                mp->log = NULL;
 114                set_bit(META_free, &mp->flag);
 115                init_waitqueue_head(&mp->wait);
 116        }
 117}
 118
 119static void empty_reserved(void)
 120{
 121        while (num_reserved--)
 122                kmem_cache_free(metapage_cache,
 123                                reserved_metapages[num_reserved]);
 124}
 125
 126static struct metapage *alloc_metapage(int *dropped_lock, int no_wait)
 127{
 128        struct metapage *new;
 129
 130        *dropped_lock = 0;
 131
 132        /*
 133         * Always try an atomic alloc first, to avoid dropping the
 134         * spinlock
 135         */
 136        new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC);
 137        if (new)
 138                return new;
 139
 140        if (no_wait && num_reserved)
 141                return reserved_metapages[--num_reserved];
 142
 143        *dropped_lock = 1;
 144        spin_unlock(&meta_lock);
 145        new = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 146        spin_lock(&meta_lock);
 147        return new;
 148}
 149
 150static void __free_metapage(struct metapage *mp)
 151{
 152        mp->flag = 0;
 153        set_bit(META_free, &mp->flag);
 154
 155        if (num_reserved < METAPOOL_MIN_PAGES)
 156                reserved_metapages[num_reserved++] = mp;
 157        else
 158                kmem_cache_free(metapage_cache, mp);
 159}
 160
 161static inline void free_metapage(struct metapage * mp)
 162{
 163        spin_lock(&meta_lock);
 164        __free_metapage(mp);
 165        spin_unlock(&meta_lock);
 166}
 167
 168int __init metapage_init(void)
 169{
 170        struct metapage *mp;
 171
 172        /*
 173         * Initialize wait queue
 174         */
 175        init_waitqueue_head(&meta_wait);
 176
 177        /*
 178         * Allocate the metapage structures
 179         */
 180        metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
 181                                           0, 0, init_once, NULL);
 182        if (metapage_cache == NULL)
 183                return -ENOMEM;
 184
 185        while (num_reserved < METAPOOL_MIN_PAGES) {
 186                mp = kmem_cache_alloc(metapage_cache, GFP_NOFS);
 187                if (mp)
 188                        reserved_metapages[num_reserved++] = mp;
 189                else {
 190                        empty_reserved();
 191                        kmem_cache_destroy(metapage_cache);
 192                        return -ENOMEM;
 193                }
 194        }
 195        /*
 196         * Now the hash list
 197         */
 198        for (hash_order = 0;
 199             ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
 200             hash_order++);
 201        hash_table =
 202            (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
 203        assert(hash_table);
 204        memset(hash_table, 0, PAGE_SIZE << hash_order);
 205
 206        return 0;
 207}
 208
 209void metapage_exit(void)
 210{
 211        empty_reserved();
 212        kmem_cache_destroy(metapage_cache);
 213}
 214
 215/*
 216 * Basically same hash as in pagemap.h, but using our hash table
 217 */
 218static struct metapage **meta_hash(struct address_space *mapping,
 219                                   unsigned long index)
 220{
 221#define i (((unsigned long)mapping)/ \
 222           (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
 223#define s(x) ((x) + ((x) >> HASH_BITS))
 224        return hash_table + (s(i + index) & (HASH_SIZE - 1));
 225#undef i
 226#undef s
 227}
 228
 229static struct metapage *search_hash(struct metapage ** hash_ptr,
 230                                    struct address_space *mapping,
 231                               unsigned long index)
 232{
 233        struct metapage *ptr;
 234
 235        for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
 236                if ((ptr->mapping == mapping) && (ptr->index == index))
 237                        return ptr;
 238        }
 239
 240        return NULL;
 241}
 242
 243static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
 244{
 245        if (*hash_ptr)
 246                (*hash_ptr)->hash_prev = mp;
 247
 248        mp->hash_prev = NULL;
 249        mp->hash_next = *hash_ptr;
 250        *hash_ptr = mp;
 251}
 252
 253static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
 254{
 255        if (mp->hash_prev)
 256                mp->hash_prev->hash_next = mp->hash_next;
 257        else {
 258                assert(*hash_ptr == mp);
 259                *hash_ptr = mp->hash_next;
 260        }
 261
 262        if (mp->hash_next)
 263                mp->hash_next->hash_prev = mp->hash_prev;
 264}
 265
 266struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 267                                unsigned int size, int absolute,
 268                                unsigned long new)
 269{
 270        int dropped_lock;
 271        struct metapage **hash_ptr;
 272        int l2BlocksPerPage;
 273        int l2bsize;
 274        int no_wait;
 275        struct address_space *mapping;
 276        struct metapage *mp;
 277        unsigned long page_index;
 278        unsigned long page_offset;
 279
 280        jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
 281
 282        if (absolute)
 283                mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
 284        else {
 285                /*
 286                 * If an nfs client tries to read an inode that is larger
 287                 * than any existing inodes, we may try to read past the
 288                 * end of the inode map
 289                 */
 290                if ((lblock << inode->i_blkbits) >= inode->i_size)
 291                        return NULL;
 292                mapping = inode->i_mapping;
 293        }
 294
 295        hash_ptr = meta_hash(mapping, lblock);
 296again:
 297        spin_lock(&meta_lock);
 298        mp = search_hash(hash_ptr, mapping, lblock);
 299        if (mp) {
 300              page_found:
 301                mp->count++;
 302                lock_metapage(mp);
 303                spin_unlock(&meta_lock);
 304                if (test_bit(META_stale, &mp->flag)) {
 305                        release_metapage(mp);
 306                        yield();        /* Let other waiters release it, too */
 307                        goto again;
 308                }
 309                if (test_bit(META_discard, &mp->flag)) {
 310                        if (!new) {
 311                                jfs_error(inode->i_sb,
 312                                          "__get_metapage: using a "
 313                                          "discarded metapage");
 314                                release_metapage(mp);
 315                                return NULL;
 316                        }
 317                        clear_bit(META_discard, &mp->flag);
 318                }
 319                jfs_info("__get_metapage: found 0x%p, in hash", mp);
 320                if (mp->logical_size != size) {
 321                        jfs_error(inode->i_sb,
 322                                  "__get_metapage: mp->logical_size != size");
 323                        release_metapage(mp);
 324                        return NULL;
 325                }
 326        } else {
 327                l2bsize = inode->i_blkbits;
 328                l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 329                page_index = lblock >> l2BlocksPerPage;
 330                page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
 331                    l2bsize;
 332                if ((page_offset + size) > PAGE_CACHE_SIZE) {
 333                        spin_unlock(&meta_lock);
 334                        jfs_err("MetaData crosses page boundary!!");
 335                        return NULL;
 336                }
 337                
 338                /*
 339                 * Locks held on aggregate inode pages are usually
 340                 * not held long, and they are taken in critical code
 341                 * paths (committing dirty inodes, txCommit thread) 
 342                 * 
 343                 * Attempt to get metapage without blocking, tapping into
 344                 * reserves if necessary.
 345                 */
 346                if (JFS_IP(inode)->fileset == AGGREGATE_I)
 347                        no_wait = 1;
 348                else
 349                        no_wait = 0;
 350
 351                mp = alloc_metapage(&dropped_lock, no_wait);
 352                if (!mp) {
 353                        spin_unlock(&meta_lock);
 354                        return NULL;
 355                }
 356                if (dropped_lock) {
 357                        /* alloc_metapage blocked, we need to search the hash
 358                         * again.
 359                         */
 360                        struct metapage *mp2;
 361                        mp2 = search_hash(hash_ptr, mapping, lblock);
 362                        if (mp2) {
 363                                __free_metapage(mp);
 364                                mp = mp2;
 365                                goto page_found;
 366                        }
 367                }
 368                mp->flag = 0;
 369                lock_metapage(mp);
 370                if (absolute)
 371                        set_bit(META_absolute, &mp->flag);
 372                mp->xflag = COMMIT_PAGE;
 373                mp->count = 1;
 374                atomic_set(&mp->nohomeok,0);
 375                mp->mapping = mapping;
 376                mp->index = lblock;
 377                mp->page = 0;
 378                mp->logical_size = size;
 379                add_to_hash(mp, hash_ptr);
 380                spin_unlock(&meta_lock);
 381
 382                if (new) {
 383                        jfs_info("__get_metapage: Calling grab_cache_page");
 384                        mp->page = grab_cache_page(mapping, page_index);
 385                        if (!mp->page) {
 386                                jfs_err("grab_cache_page failed!");
 387                                goto freeit;
 388                        } else {
 389                                INCREMENT(mpStat.pagealloc);
 390                                UnlockPage(mp->page);
 391                        }
 392                } else {
 393                        jfs_info("__get_metapage: Calling read_cache_page");
 394                        mp->page = read_cache_page(mapping, lblock,
 395                                    (filler_t *)mapping->a_ops->readpage, NULL);
 396                        if (IS_ERR(mp->page)) {
 397                                jfs_err("read_cache_page failed!");
 398                                goto freeit;
 399                        } else
 400                                INCREMENT(mpStat.pagealloc);
 401                }
 402                mp->data = kmap(mp->page) + page_offset;
 403        }
 404
 405        if (new)
 406                memset(mp->data, 0, PSIZE);
 407
 408        jfs_info("__get_metapage: returning = 0x%p", mp);
 409        return mp;
 410
 411freeit:
 412        spin_lock(&meta_lock);
 413        remove_from_hash(mp, hash_ptr);
 414        __free_metapage(mp);
 415        spin_unlock(&meta_lock);
 416        return NULL;
 417}
 418
 419void hold_metapage(struct metapage * mp, int force)
 420{
 421        spin_lock(&meta_lock);
 422
 423        mp->count++;
 424
 425        if (force) {
 426                ASSERT (!(test_bit(META_forced, &mp->flag)));
 427                if (trylock_metapage(mp))
 428                        set_bit(META_forced, &mp->flag);
 429        } else
 430                lock_metapage(mp);
 431
 432        spin_unlock(&meta_lock);
 433}
 434
 435static void __write_metapage(struct metapage * mp)
 436{
 437        int l2bsize = mp->mapping->host->i_blkbits;
 438        int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
 439        unsigned long page_index;
 440        unsigned long page_offset;
 441        int rc;
 442
 443        jfs_info("__write_metapage: mp = 0x%p", mp);
 444
 445        if (test_bit(META_discard, &mp->flag)) {
 446                /*
 447                 * This metadata is no longer valid
 448                 */
 449                clear_bit(META_dirty, &mp->flag);
 450                return;
 451        }
 452
 453        page_index = mp->page->index;
 454        page_offset =
 455            (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
 456
 457        lock_page(mp->page);
 458        rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
 459                                               page_offset +
 460                                               mp->logical_size);
 461        if (rc) {
 462                jfs_err("prepare_write return %d!", rc);
 463                ClearPageUptodate(mp->page);
 464                UnlockPage(mp->page);
 465                kunmap(mp->page);
 466                clear_bit(META_dirty, &mp->flag);
 467                return;
 468        }
 469        rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
 470                                              page_offset +
 471                                              mp->logical_size);
 472        if (rc) {
 473                jfs_err("commit_write returned %d", rc);
 474        }
 475
 476        UnlockPage(mp->page);
 477        clear_bit(META_dirty, &mp->flag);
 478
 479        jfs_info("__write_metapage done");
 480}
 481
 482static inline void sync_metapage(struct metapage *mp)
 483{
 484        struct page *page = mp->page;
 485
 486        page_cache_get(page);
 487        lock_page(page);
 488
 489        /* we're done with this page - no need to check for errors */
 490        if (page->buffers) {
 491                writeout_one_page(page);
 492                waitfor_one_page(page);
 493        }
 494
 495        UnlockPage(page);
 496        page_cache_release(page);
 497}
 498
 499void release_metapage(struct metapage * mp)
 500{
 501        struct jfs_log *log;
 502
 503        jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
 504
 505        spin_lock(&meta_lock);
 506        if (test_bit(META_forced, &mp->flag)) {
 507                clear_bit(META_forced, &mp->flag);
 508                mp->count--;
 509                spin_unlock(&meta_lock);
 510                return;
 511        }
 512
 513        assert(mp->count);
 514        if (--mp->count || atomic_read(&mp->nohomeok)) {
 515                unlock_metapage(mp);
 516                spin_unlock(&meta_lock);
 517                return;
 518        }
 519
 520        if (mp->page) {
 521                /* Releasing spinlock, we have to check mp->count later */
 522                set_bit(META_stale, &mp->flag);
 523                spin_unlock(&meta_lock);
 524                kunmap(mp->page);
 525                mp->data = 0;
 526                if (test_bit(META_dirty, &mp->flag))
 527                        __write_metapage(mp);
 528                if (test_bit(META_sync, &mp->flag)) {
 529                        sync_metapage(mp);
 530                        clear_bit(META_sync, &mp->flag);
 531                }
 532
 533                if (test_bit(META_discard, &mp->flag)) {
 534                        lock_page(mp->page);
 535                        block_flushpage(mp->page, 0);
 536                        UnlockPage(mp->page);
 537                }
 538
 539                page_cache_release(mp->page);
 540                mp->page = NULL;
 541                INCREMENT(mpStat.pagefree);
 542                spin_lock(&meta_lock);
 543        }
 544
 545        if (mp->lsn) {
 546                /*
 547                 * Remove metapage from logsynclist.
 548                 */
 549                log = mp->log;
 550                LOGSYNC_LOCK(log);
 551                mp->log = 0;
 552                mp->lsn = 0;
 553                mp->clsn = 0;
 554                log->count--;
 555                list_del(&mp->synclist);
 556                LOGSYNC_UNLOCK(log);
 557        }
 558        if (mp->count) {
 559                /* Someone else is trying to get this metpage */
 560                unlock_metapage(mp);
 561                spin_unlock(&meta_lock);
 562                return;
 563        }
 564        remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
 565        spin_unlock(&meta_lock);
 566
 567        free_metapage(mp);
 568}
 569
 570void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 571{
 572        struct metapage **hash_ptr;
 573        unsigned long lblock;
 574        int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
 575        /* All callers are interested in block device's mapping */
 576        struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
 577        struct metapage *mp;
 578        struct page *page;
 579
 580        /*
 581         * First, mark metapages to discard.  They will eventually be
 582         * released, but should not be written.
 583         */
 584        for (lblock = addr; lblock < addr + len;
 585             lblock += 1 << l2BlocksPerPage) {
 586                hash_ptr = meta_hash(mapping, lblock);
 587again:
 588                spin_lock(&meta_lock);
 589                mp = search_hash(hash_ptr, mapping, lblock);
 590                if (mp) {
 591                        if (test_bit(META_stale, &mp->flag)) {
 592                                /* Racing with release_metapage */
 593                                mp->count++;
 594                                lock_metapage(mp);
 595                                spin_unlock(&meta_lock);
 596                                /* racing release_metapage should be done now */
 597                                release_metapage(mp);
 598                                goto again;
 599                        }
 600
 601                        set_bit(META_discard, &mp->flag);
 602                        spin_unlock(&meta_lock);
 603                } else {
 604                        spin_unlock(&meta_lock);
 605                        page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
 606                        if (page) {
 607                                block_flushpage(page, 0);
 608                                UnlockPage(page);
 609                                page_cache_release(page);
 610                        }
 611                }
 612        }
 613}
 614
 615#ifdef CONFIG_JFS_STATISTICS
 616int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 617                    int *eof, void *data)
 618{
 619        int len = 0;
 620        off_t begin;
 621
 622        len += sprintf(buffer,
 623                       "JFS Metapage statistics\n"
 624                       "=======================\n"
 625                       "page allocations = %d\n"
 626                       "page frees = %d\n"
 627                       "lock waits = %d\n"
 628                       "allocation waits = %d\n",
 629                       mpStat.pagealloc,
 630                       mpStat.pagefree,
 631                       mpStat.lockwait,
 632                       mpStat.allocwait);
 633
 634        begin = offset;
 635        *start = buffer + begin;
 636        len -= begin;
 637
 638        if (len > length)
 639                len = length;
 640        else
 641                *eof = 1;
 642
 643        if (len < 0)
 644                len = 0;
 645
 646        return len;
 647}
 648#endif
 649
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.