linux/fs/gfs2/glops.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
   3 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
   4 *
   5 * This copyrighted material is made available to anyone wishing to use,
   6 * modify, copy, or redistribute it subject to the terms and conditions
   7 * of the GNU General Public License version 2.
   8 */
   9
  10#include <linux/spinlock.h>
  11#include <linux/completion.h>
  12#include <linux/buffer_head.h>
  13#include <linux/gfs2_ondisk.h>
  14#include <linux/bio.h>
  15#include <linux/posix_acl.h>
  16
  17#include "gfs2.h"
  18#include "incore.h"
  19#include "bmap.h"
  20#include "glock.h"
  21#include "glops.h"
  22#include "inode.h"
  23#include "log.h"
  24#include "meta_io.h"
  25#include "recovery.h"
  26#include "rgrp.h"
  27#include "util.h"
  28#include "trans.h"
  29#include "dir.h"
  30
  31static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
  32{
  33        fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
  34               bh, (unsigned long long)bh->b_blocknr, bh->b_state,
  35               bh->b_page->mapping, bh->b_page->flags);
  36        fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n",
  37               gl->gl_name.ln_type, gl->gl_name.ln_number,
  38               gfs2_glock2aspace(gl));
  39        gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n");
  40}
  41
  42/**
  43 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
  44 * @gl: the glock
  45 * @fsync: set when called from fsync (not all buffers will be clean)
  46 *
  47 * None of the buffers should be dirty, locked, or pinned.
  48 */
  49
  50static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
  51{
  52        struct gfs2_sbd *sdp = gl->gl_sbd;
  53        struct list_head *head = &gl->gl_ail_list;
  54        struct gfs2_bufdata *bd, *tmp;
  55        struct buffer_head *bh;
  56        const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock);
  57        sector_t blocknr;
  58
  59        gfs2_log_lock(sdp);
  60        spin_lock(&sdp->sd_ail_lock);
  61        list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
  62                bh = bd->bd_bh;
  63                if (bh->b_state & b_state) {
  64                        if (fsync)
  65                                continue;
  66                        gfs2_ail_error(gl, bh);
  67                }
  68                blocknr = bh->b_blocknr;
  69                bh->b_private = NULL;
  70                gfs2_remove_from_ail(bd); /* drops ref on bh */
  71
  72                bd->bd_bh = NULL;
  73                bd->bd_blkno = blocknr;
  74
  75                gfs2_trans_add_revoke(sdp, bd);
  76        }
  77        BUG_ON(!fsync && atomic_read(&gl->gl_ail_count));
  78        spin_unlock(&sdp->sd_ail_lock);
  79        gfs2_log_unlock(sdp);
  80}
  81
  82
  83static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
  84{
  85        struct gfs2_sbd *sdp = gl->gl_sbd;
  86        struct gfs2_trans tr;
  87
  88        memset(&tr, 0, sizeof(tr));
  89        tr.tr_revokes = atomic_read(&gl->gl_ail_count);
  90
  91        if (!tr.tr_revokes)
  92                return;
  93
  94        /* A shortened, inline version of gfs2_trans_begin() */
  95        tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
  96        tr.tr_ip = (unsigned long)__builtin_return_address(0);
  97        gfs2_log_reserve(sdp, tr.tr_reserved);
  98        BUG_ON(current->journal_info);
  99        current->journal_info = &tr;
 100
 101        __gfs2_ail_flush(gl, 0);
 102
 103        gfs2_trans_end(sdp);
 104        gfs2_log_flush(sdp, NULL);
 105}
 106
 107void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
 108{
 109        struct gfs2_sbd *sdp = gl->gl_sbd;
 110        unsigned int revokes = atomic_read(&gl->gl_ail_count);
 111        int ret;
 112
 113        if (!revokes)
 114                return;
 115
 116        ret = gfs2_trans_begin(sdp, 0, revokes);
 117        if (ret)
 118                return;
 119        __gfs2_ail_flush(gl, fsync);
 120        gfs2_trans_end(sdp);
 121        gfs2_log_flush(sdp, NULL);
 122}
 123
 124/**
 125 * rgrp_go_sync - sync out the metadata for this glock
 126 * @gl: the glock
 127 *
 128 * Called when demoting or unlocking an EX glock.  We must flush
 129 * to disk all dirty buffers/pages relating to this glock, and must not
 130 * not return to caller to demote/unlock the glock until I/O is complete.
 131 */
 132
 133static void rgrp_go_sync(struct gfs2_glock *gl)
 134{
 135        struct address_space *metamapping = gfs2_glock2aspace(gl);
 136        struct gfs2_rgrpd *rgd;
 137        int error;
 138
 139        if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
 140                return;
 141        BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
 142
 143        gfs2_log_flush(gl->gl_sbd, gl);
 144        filemap_fdatawrite(metamapping);
 145        error = filemap_fdatawait(metamapping);
 146        mapping_set_error(metamapping, error);
 147        gfs2_ail_empty_gl(gl);
 148
 149        spin_lock(&gl->gl_spin);
 150        rgd = gl->gl_object;
 151        if (rgd)
 152                gfs2_free_clones(rgd);
 153        spin_unlock(&gl->gl_spin);
 154}
 155
 156/**
 157 * rgrp_go_inval - invalidate the metadata for this glock
 158 * @gl: the glock
 159 * @flags:
 160 *
 161 * We never used LM_ST_DEFERRED with resource groups, so that we
 162 * should always see the metadata flag set here.
 163 *
 164 */
 165
 166static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
 167{
 168        struct address_space *mapping = gfs2_glock2aspace(gl);
 169
 170        BUG_ON(!(flags & DIO_METADATA));
 171        gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
 172        truncate_inode_pages(mapping, 0);
 173
 174        if (gl->gl_object) {
 175                struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object;
 176                rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
 177        }
 178}
 179
 180/**
 181 * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
 182 * @gl: the glock protecting the inode
 183 *
 184 */
 185
 186static void inode_go_sync(struct gfs2_glock *gl)
 187{
 188        struct gfs2_inode *ip = gl->gl_object;
 189        struct address_space *metamapping = gfs2_glock2aspace(gl);
 190        int error;
 191
 192        if (ip && !S_ISREG(ip->i_inode.i_mode))
 193                ip = NULL;
 194        if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
 195                unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
 196        if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
 197                return;
 198
 199        BUG_ON(gl->gl_state != LM_ST_EXCLUSIVE);
 200
 201        gfs2_log_flush(gl->gl_sbd, gl);
 202        filemap_fdatawrite(metamapping);
 203        if (ip) {
 204                struct address_space *mapping = ip->i_inode.i_mapping;
 205                filemap_fdatawrite(mapping);
 206                error = filemap_fdatawait(mapping);
 207                mapping_set_error(mapping, error);
 208        }
 209        error = filemap_fdatawait(metamapping);
 210        mapping_set_error(metamapping, error);
 211        gfs2_ail_empty_gl(gl);
 212        /*
 213         * Writeback of the data mapping may cause the dirty flag to be set
 214         * so we have to clear it again here.
 215         */
 216        smp_mb__before_clear_bit();
 217        clear_bit(GLF_DIRTY, &gl->gl_flags);
 218}
 219
 220/**
 221 * inode_go_inval - prepare a inode glock to be released
 222 * @gl: the glock
 223 * @flags:
 224 * 
 225 * Normally we invlidate everything, but if we are moving into
 226 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
 227 * can keep hold of the metadata, since it won't have changed.
 228 *
 229 */
 230
 231static void inode_go_inval(struct gfs2_glock *gl, int flags)
 232{
 233        struct gfs2_inode *ip = gl->gl_object;
 234
 235        gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count));
 236
 237        if (flags & DIO_METADATA) {
 238                struct address_space *mapping = gfs2_glock2aspace(gl);
 239                truncate_inode_pages(mapping, 0);
 240                if (ip) {
 241                        set_bit(GIF_INVALID, &ip->i_flags);
 242                        forget_all_cached_acls(&ip->i_inode);
 243                        gfs2_dir_hash_inval(ip);
 244                }
 245        }
 246
 247        if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) {
 248                gfs2_log_flush(gl->gl_sbd, NULL);
 249                gl->gl_sbd->sd_rindex_uptodate = 0;
 250        }
 251        if (ip && S_ISREG(ip->i_inode.i_mode))
 252                truncate_inode_pages(ip->i_inode.i_mapping, 0);
 253}
 254
 255/**
 256 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
 257 * @gl: the glock
 258 *
 259 * Returns: 1 if it's ok
 260 */
 261
 262static int inode_go_demote_ok(const struct gfs2_glock *gl)
 263{
 264        struct gfs2_sbd *sdp = gl->gl_sbd;
 265        struct gfs2_holder *gh;
 266
 267        if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
 268                return 0;
 269
 270        if (!list_empty(&gl->gl_holders)) {
 271                gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
 272                if (gh->gh_list.next != &gl->gl_holders)
 273                        return 0;
 274        }
 275
 276        return 1;
 277}
 278
 279/**
 280 * gfs2_set_nlink - Set the inode's link count based on on-disk info
 281 * @inode: The inode in question
 282 * @nlink: The link count
 283 *
 284 * If the link count has hit zero, it must never be raised, whatever the
 285 * on-disk inode might say. When new struct inodes are created the link
 286 * count is set to 1, so that we can safely use this test even when reading
 287 * in on disk information for the first time.
 288 */
 289
 290static void gfs2_set_nlink(struct inode *inode, u32 nlink)
 291{
 292        /*
 293         * We will need to review setting the nlink count here in the
 294         * light of the forthcoming ro bind mount work. This is a reminder
 295         * to do that.
 296         */
 297        if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
 298                if (nlink == 0)
 299                        clear_nlink(inode);
 300                else
 301                        set_nlink(inode, nlink);
 302        }
 303}
 304
 305static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 306{
 307        const struct gfs2_dinode *str = buf;
 308        struct timespec atime;
 309        u16 height, depth;
 310
 311        if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
 312                goto corrupt;
 313        ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
 314        ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
 315        ip->i_inode.i_rdev = 0;
 316        switch (ip->i_inode.i_mode & S_IFMT) {
 317        case S_IFBLK:
 318        case S_IFCHR:
 319                ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
 320                                           be32_to_cpu(str->di_minor));
 321                break;
 322        };
 323
 324        ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
 325        ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
 326        gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
 327        i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
 328        gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
 329        atime.tv_sec = be64_to_cpu(str->di_atime);
 330        atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
 331        if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
 332                ip->i_inode.i_atime = atime;
 333        ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
 334        ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
 335        ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
 336        ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
 337
 338        ip->i_goal = be64_to_cpu(str->di_goal_meta);
 339        ip->i_generation = be64_to_cpu(str->di_generation);
 340
 341        ip->i_diskflags = be32_to_cpu(str->di_flags);
 342        ip->i_eattr = be64_to_cpu(str->di_eattr);
 343        /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
 344        gfs2_set_inode_flags(&ip->i_inode);
 345        height = be16_to_cpu(str->di_height);
 346        if (unlikely(height > GFS2_MAX_META_HEIGHT))
 347                goto corrupt;
 348        ip->i_height = (u8)height;
 349
 350        depth = be16_to_cpu(str->di_depth);
 351        if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
 352                goto corrupt;
 353        ip->i_depth = (u8)depth;
 354        ip->i_entries = be32_to_cpu(str->di_entries);
 355
 356        if (S_ISREG(ip->i_inode.i_mode))
 357                gfs2_set_aops(&ip->i_inode);
 358
 359        return 0;
 360corrupt:
 361        gfs2_consist_inode(ip);
 362        return -EIO;
 363}
 364
 365/**
 366 * gfs2_inode_refresh - Refresh the incore copy of the dinode
 367 * @ip: The GFS2 inode
 368 *
 369 * Returns: errno
 370 */
 371
 372int gfs2_inode_refresh(struct gfs2_inode *ip)
 373{
 374        struct buffer_head *dibh;
 375        int error;
 376
 377        error = gfs2_meta_inode_buffer(ip, &dibh);
 378        if (error)
 379                return error;
 380
 381        error = gfs2_dinode_in(ip, dibh->b_data);
 382        brelse(dibh);
 383        clear_bit(GIF_INVALID, &ip->i_flags);
 384
 385        return error;
 386}
 387
 388/**
 389 * inode_go_lock - operation done after an inode lock is locked by a process
 390 * @gl: the glock
 391 * @flags:
 392 *
 393 * Returns: errno
 394 */
 395
 396static int inode_go_lock(struct gfs2_holder *gh)
 397{
 398        struct gfs2_glock *gl = gh->gh_gl;
 399        struct gfs2_sbd *sdp = gl->gl_sbd;
 400        struct gfs2_inode *ip = gl->gl_object;
 401        int error = 0;
 402
 403        if (!ip || (gh->gh_flags & GL_SKIP))
 404                return 0;
 405
 406        if (test_bit(GIF_INVALID, &ip->i_flags)) {
 407                error = gfs2_inode_refresh(ip);
 408                if (error)
 409                        return error;
 410        }
 411
 412        if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
 413            (gl->gl_state == LM_ST_EXCLUSIVE) &&
 414            (gh->gh_state == LM_ST_EXCLUSIVE)) {
 415                spin_lock(&sdp->sd_trunc_lock);
 416                if (list_empty(&ip->i_trunc_list))
 417                        list_add(&sdp->sd_trunc_list, &ip->i_trunc_list);
 418                spin_unlock(&sdp->sd_trunc_lock);
 419                wake_up(&sdp->sd_quota_wait);
 420                return 1;
 421        }
 422
 423        return error;
 424}
 425
 426/**
 427 * inode_go_dump - print information about an inode
 428 * @seq: The iterator
 429 * @ip: the inode
 430 *
 431 * Returns: 0 on success, -ENOBUFS when we run out of space
 432 */
 433
 434static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 435{
 436        const struct gfs2_inode *ip = gl->gl_object;
 437        if (ip == NULL)
 438                return 0;
 439        gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n",
 440                  (unsigned long long)ip->i_no_formal_ino,
 441                  (unsigned long long)ip->i_no_addr,
 442                  IF2DT(ip->i_inode.i_mode), ip->i_flags,
 443                  (unsigned int)ip->i_diskflags,
 444                  (unsigned long long)i_size_read(&ip->i_inode));
 445        return 0;
 446}
 447
 448/**
 449 * trans_go_sync - promote/demote the transaction glock
 450 * @gl: the glock
 451 * @state: the requested state
 452 * @flags:
 453 *
 454 */
 455
 456static void trans_go_sync(struct gfs2_glock *gl)
 457{
 458        struct gfs2_sbd *sdp = gl->gl_sbd;
 459
 460        if (gl->gl_state != LM_ST_UNLOCKED &&
 461            test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
 462                gfs2_meta_syncfs(sdp);
 463                gfs2_log_shutdown(sdp);
 464        }
 465}
 466
 467/**
 468 * trans_go_xmote_bh - After promoting/demoting the transaction glock
 469 * @gl: the glock
 470 *
 471 */
 472
 473static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
 474{
 475        struct gfs2_sbd *sdp = gl->gl_sbd;
 476        struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 477        struct gfs2_glock *j_gl = ip->i_gl;
 478        struct gfs2_log_header_host head;
 479        int error;
 480
 481        if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
 482                j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 483
 484                error = gfs2_find_jhead(sdp->sd_jdesc, &head);
 485                if (error)
 486                        gfs2_consist(sdp);
 487                if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
 488                        gfs2_consist(sdp);
 489
 490                /*  Initialize some head of the log stuff  */
 491                if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
 492                        sdp->sd_log_sequence = head.lh_sequence + 1;
 493                        gfs2_log_pointers_init(sdp, head.lh_blkno);
 494                }
 495        }
 496        return 0;
 497}
 498
 499/**
 500 * trans_go_demote_ok
 501 * @gl: the glock
 502 *
 503 * Always returns 0
 504 */
 505
 506static int trans_go_demote_ok(const struct gfs2_glock *gl)
 507{
 508        return 0;
 509}
 510
 511/**
 512 * iopen_go_callback - schedule the dcache entry for the inode to be deleted
 513 * @gl: the glock
 514 *
 515 * gl_spin lock is held while calling this
 516 */
 517static void iopen_go_callback(struct gfs2_glock *gl)
 518{
 519        struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
 520        struct gfs2_sbd *sdp = gl->gl_sbd;
 521
 522        if (sdp->sd_vfs->s_flags & MS_RDONLY)
 523                return;
 524
 525        if (gl->gl_demote_state == LM_ST_UNLOCKED &&
 526            gl->gl_state == LM_ST_SHARED && ip) {
 527                gfs2_glock_hold(gl);
 528                if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
 529                        gfs2_glock_put_nolock(gl);
 530        }
 531}
 532
 533const struct gfs2_glock_operations gfs2_meta_glops = {
 534        .go_type = LM_TYPE_META,
 535};
 536
 537const struct gfs2_glock_operations gfs2_inode_glops = {
 538        .go_xmote_th = inode_go_sync,
 539        .go_inval = inode_go_inval,
 540        .go_demote_ok = inode_go_demote_ok,
 541        .go_lock = inode_go_lock,
 542        .go_dump = inode_go_dump,
 543        .go_type = LM_TYPE_INODE,
 544        .go_flags = GLOF_ASPACE,
 545};
 546
 547const struct gfs2_glock_operations gfs2_rgrp_glops = {
 548        .go_xmote_th = rgrp_go_sync,
 549        .go_inval = rgrp_go_inval,
 550        .go_lock = gfs2_rgrp_go_lock,
 551        .go_unlock = gfs2_rgrp_go_unlock,
 552        .go_dump = gfs2_rgrp_dump,
 553        .go_type = LM_TYPE_RGRP,
 554        .go_flags = GLOF_ASPACE,
 555};
 556
 557const struct gfs2_glock_operations gfs2_trans_glops = {
 558        .go_xmote_th = trans_go_sync,
 559        .go_xmote_bh = trans_go_xmote_bh,
 560        .go_demote_ok = trans_go_demote_ok,
 561        .go_type = LM_TYPE_NONDISK,
 562};
 563
 564const struct gfs2_glock_operations gfs2_iopen_glops = {
 565        .go_type = LM_TYPE_IOPEN,
 566        .go_callback = iopen_go_callback,
 567};
 568
 569const struct gfs2_glock_operations gfs2_flock_glops = {
 570        .go_type = LM_TYPE_FLOCK,
 571};
 572
 573const struct gfs2_glock_operations gfs2_nondisk_glops = {
 574        .go_type = LM_TYPE_NONDISK,
 575};
 576
 577const struct gfs2_glock_operations gfs2_quota_glops = {
 578        .go_type = LM_TYPE_QUOTA,
 579};
 580
 581const struct gfs2_glock_operations gfs2_journal_glops = {
 582        .go_type = LM_TYPE_JOURNAL,
 583};
 584
 585const struct gfs2_glock_operations *gfs2_glops_list[] = {
 586        [LM_TYPE_META] = &gfs2_meta_glops,
 587        [LM_TYPE_INODE] = &gfs2_inode_glops,
 588        [LM_TYPE_RGRP] = &gfs2_rgrp_glops,
 589        [LM_TYPE_IOPEN] = &gfs2_iopen_glops,
 590        [LM_TYPE_FLOCK] = &gfs2_flock_glops,
 591        [LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
 592        [LM_TYPE_QUOTA] = &gfs2_quota_glops,
 593        [LM_TYPE_JOURNAL] = &gfs2_journal_glops,
 594};
 595
 596
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.