linux/fs/ocfs2/slot_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * slot_map.c
   5 *
   6 *
   7 *
   8 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License as published by the Free Software Foundation; either
  13 * version 2 of the License, or (at your option) any later version.
  14 *
  15 * This program is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU General Public
  21 * License along with this program; if not, write to the
  22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23 * Boston, MA 021110-1307, USA.
  24 */
  25
  26#include <linux/types.h>
  27#include <linux/slab.h>
  28#include <linux/highmem.h>
  29
  30#define MLOG_MASK_PREFIX ML_SUPER
  31#include <cluster/masklog.h>
  32
  33#include "ocfs2.h"
  34
  35#include "dlmglue.h"
  36#include "extent_map.h"
  37#include "heartbeat.h"
  38#include "inode.h"
  39#include "slot_map.h"
  40#include "super.h"
  41#include "sysfile.h"
  42
  43#include "buffer_head_io.h"
  44
  45
  46struct ocfs2_slot {
  47        int sl_valid;
  48        unsigned int sl_node_num;
  49};
  50
  51struct ocfs2_slot_info {
  52        int si_extended;
  53        int si_slots_per_block;
  54        struct inode *si_inode;
  55        unsigned int si_blocks;
  56        struct buffer_head **si_bh;
  57        unsigned int si_num_slots;
  58        struct ocfs2_slot *si_slots;
  59};
  60
  61
  62static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
  63                                    unsigned int node_num);
  64
  65static void ocfs2_invalidate_slot(struct ocfs2_slot_info *si,
  66                                  int slot_num)
  67{
  68        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  69        si->si_slots[slot_num].sl_valid = 0;
  70}
  71
  72static void ocfs2_set_slot(struct ocfs2_slot_info *si,
  73                           int slot_num, unsigned int node_num)
  74{
  75        BUG_ON((slot_num < 0) || (slot_num >= si->si_num_slots));
  76
  77        si->si_slots[slot_num].sl_valid = 1;
  78        si->si_slots[slot_num].sl_node_num = node_num;
  79}
  80
  81/* This version is for the extended slot map */
  82static void ocfs2_update_slot_info_extended(struct ocfs2_slot_info *si)
  83{
  84        int b, i, slotno;
  85        struct ocfs2_slot_map_extended *se;
  86
  87        slotno = 0;
  88        for (b = 0; b < si->si_blocks; b++) {
  89                se = (struct ocfs2_slot_map_extended *)si->si_bh[b]->b_data;
  90                for (i = 0;
  91                     (i < si->si_slots_per_block) &&
  92                     (slotno < si->si_num_slots);
  93                     i++, slotno++) {
  94                        if (se->se_slots[i].es_valid)
  95                                ocfs2_set_slot(si, slotno,
  96                                               le32_to_cpu(se->se_slots[i].es_node_num));
  97                        else
  98                                ocfs2_invalidate_slot(si, slotno);
  99                }
 100        }
 101}
 102
 103/*
 104 * Post the slot information on disk into our slot_info struct.
 105 * Must be protected by osb_lock.
 106 */
 107static void ocfs2_update_slot_info_old(struct ocfs2_slot_info *si)
 108{
 109        int i;
 110        struct ocfs2_slot_map *sm;
 111
 112        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 113
 114        for (i = 0; i < si->si_num_slots; i++) {
 115                if (le16_to_cpu(sm->sm_slots[i]) == (u16)OCFS2_INVALID_SLOT)
 116                        ocfs2_invalidate_slot(si, i);
 117                else
 118                        ocfs2_set_slot(si, i, le16_to_cpu(sm->sm_slots[i]));
 119        }
 120}
 121
 122static void ocfs2_update_slot_info(struct ocfs2_slot_info *si)
 123{
 124        /*
 125         * The slot data will have been refreshed when ocfs2_super_lock
 126         * was taken.
 127         */
 128        if (si->si_extended)
 129                ocfs2_update_slot_info_extended(si);
 130        else
 131                ocfs2_update_slot_info_old(si);
 132}
 133
 134int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
 135{
 136        int ret;
 137        struct ocfs2_slot_info *si = osb->slot_info;
 138
 139        if (si == NULL)
 140                return 0;
 141
 142        BUG_ON(si->si_blocks == 0);
 143        BUG_ON(si->si_bh == NULL);
 144
 145        mlog(0, "Refreshing slot map, reading %u block(s)\n",
 146             si->si_blocks);
 147
 148        /*
 149         * We pass -1 as blocknr because we expect all of si->si_bh to
 150         * be !NULL.  Thus, ocfs2_read_blocks() will ignore blocknr.  If
 151         * this is not true, the read of -1 (UINT64_MAX) will fail.
 152         */
 153        ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
 154                                si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
 155        if (ret == 0) {
 156                spin_lock(&osb->osb_lock);
 157                ocfs2_update_slot_info(si);
 158                spin_unlock(&osb->osb_lock);
 159        }
 160
 161        return ret;
 162}
 163
 164/* post the our slot info stuff into it's destination bh and write it
 165 * out. */
 166static void ocfs2_update_disk_slot_extended(struct ocfs2_slot_info *si,
 167                                            int slot_num,
 168                                            struct buffer_head **bh)
 169{
 170        int blkind = slot_num / si->si_slots_per_block;
 171        int slotno = slot_num % si->si_slots_per_block;
 172        struct ocfs2_slot_map_extended *se;
 173
 174        BUG_ON(blkind >= si->si_blocks);
 175
 176        se = (struct ocfs2_slot_map_extended *)si->si_bh[blkind]->b_data;
 177        se->se_slots[slotno].es_valid = si->si_slots[slot_num].sl_valid;
 178        if (si->si_slots[slot_num].sl_valid)
 179                se->se_slots[slotno].es_node_num =
 180                        cpu_to_le32(si->si_slots[slot_num].sl_node_num);
 181        *bh = si->si_bh[blkind];
 182}
 183
 184static void ocfs2_update_disk_slot_old(struct ocfs2_slot_info *si,
 185                                       int slot_num,
 186                                       struct buffer_head **bh)
 187{
 188        int i;
 189        struct ocfs2_slot_map *sm;
 190
 191        sm = (struct ocfs2_slot_map *)si->si_bh[0]->b_data;
 192        for (i = 0; i < si->si_num_slots; i++) {
 193                if (si->si_slots[i].sl_valid)
 194                        sm->sm_slots[i] =
 195                                cpu_to_le16(si->si_slots[i].sl_node_num);
 196                else
 197                        sm->sm_slots[i] = cpu_to_le16(OCFS2_INVALID_SLOT);
 198        }
 199        *bh = si->si_bh[0];
 200}
 201
 202static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
 203                                  struct ocfs2_slot_info *si,
 204                                  int slot_num)
 205{
 206        int status;
 207        struct buffer_head *bh;
 208
 209        spin_lock(&osb->osb_lock);
 210        if (si->si_extended)
 211                ocfs2_update_disk_slot_extended(si, slot_num, &bh);
 212        else
 213                ocfs2_update_disk_slot_old(si, slot_num, &bh);
 214        spin_unlock(&osb->osb_lock);
 215
 216        status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
 217        if (status < 0)
 218                mlog_errno(status);
 219
 220        return status;
 221}
 222
 223/*
 224 * Calculate how many bytes are needed by the slot map.  Returns
 225 * an error if the slot map file is too small.
 226 */
 227static int ocfs2_slot_map_physical_size(struct ocfs2_super *osb,
 228                                        struct inode *inode,
 229                                        unsigned long long *bytes)
 230{
 231        unsigned long long bytes_needed;
 232
 233        if (ocfs2_uses_extended_slot_map(osb)) {
 234                bytes_needed = osb->max_slots *
 235                        sizeof(struct ocfs2_extended_slot);
 236        } else {
 237                bytes_needed = osb->max_slots * sizeof(__le16);
 238        }
 239        if (bytes_needed > i_size_read(inode)) {
 240                mlog(ML_ERROR,
 241                     "Slot map file is too small!  (size %llu, needed %llu)\n",
 242                     i_size_read(inode), bytes_needed);
 243                return -ENOSPC;
 244        }
 245
 246        *bytes = bytes_needed;
 247        return 0;
 248}
 249
 250/* try to find global node in the slot info. Returns -ENOENT
 251 * if nothing is found. */
 252static int __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
 253                                    unsigned int node_num)
 254{
 255        int i, ret = -ENOENT;
 256
 257        for(i = 0; i < si->si_num_slots; i++) {
 258                if (si->si_slots[i].sl_valid &&
 259                    (node_num == si->si_slots[i].sl_node_num)) {
 260                        ret = i;
 261                        break;
 262                }
 263        }
 264
 265        return ret;
 266}
 267
 268static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
 269                                   int preferred)
 270{
 271        int i, ret = -ENOSPC;
 272
 273        if ((preferred >= 0) && (preferred < si->si_num_slots)) {
 274                if (!si->si_slots[preferred].sl_valid) {
 275                        ret = preferred;
 276                        goto out;
 277                }
 278        }
 279
 280        for(i = 0; i < si->si_num_slots; i++) {
 281                if (!si->si_slots[i].sl_valid) {
 282                        ret = i;
 283                        break;
 284                }
 285        }
 286out:
 287        return ret;
 288}
 289
 290int ocfs2_node_num_to_slot(struct ocfs2_super *osb, unsigned int node_num)
 291{
 292        int slot;
 293        struct ocfs2_slot_info *si = osb->slot_info;
 294
 295        spin_lock(&osb->osb_lock);
 296        slot = __ocfs2_node_num_to_slot(si, node_num);
 297        spin_unlock(&osb->osb_lock);
 298
 299        return slot;
 300}
 301
 302int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 303                                  unsigned int *node_num)
 304{
 305        struct ocfs2_slot_info *si = osb->slot_info;
 306
 307        assert_spin_locked(&osb->osb_lock);
 308
 309        BUG_ON(slot_num < 0);
 310        BUG_ON(slot_num > osb->max_slots);
 311
 312        if (!si->si_slots[slot_num].sl_valid)
 313                return -ENOENT;
 314
 315        *node_num = si->si_slots[slot_num].sl_node_num;
 316        return 0;
 317}
 318
 319static void __ocfs2_free_slot_info(struct ocfs2_slot_info *si)
 320{
 321        unsigned int i;
 322
 323        if (si == NULL)
 324                return;
 325
 326        if (si->si_inode)
 327                iput(si->si_inode);
 328        if (si->si_bh) {
 329                for (i = 0; i < si->si_blocks; i++) {
 330                        if (si->si_bh[i]) {
 331                                brelse(si->si_bh[i]);
 332                                si->si_bh[i] = NULL;
 333                        }
 334                }
 335                kfree(si->si_bh);
 336        }
 337
 338        kfree(si);
 339}
 340
 341int ocfs2_clear_slot(struct ocfs2_super *osb, int slot_num)
 342{
 343        struct ocfs2_slot_info *si = osb->slot_info;
 344
 345        if (si == NULL)
 346                return 0;
 347
 348        spin_lock(&osb->osb_lock);
 349        ocfs2_invalidate_slot(si, slot_num);
 350        spin_unlock(&osb->osb_lock);
 351
 352        return ocfs2_update_disk_slot(osb, osb->slot_info, slot_num);
 353}
 354
 355static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
 356                                  struct ocfs2_slot_info *si)
 357{
 358        int status = 0;
 359        u64 blkno;
 360        unsigned long long blocks, bytes;
 361        unsigned int i;
 362        struct buffer_head *bh;
 363
 364        status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes);
 365        if (status)
 366                goto bail;
 367
 368        blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes);
 369        BUG_ON(blocks > UINT_MAX);
 370        si->si_blocks = blocks;
 371        if (!si->si_blocks)
 372                goto bail;
 373
 374        if (si->si_extended)
 375                si->si_slots_per_block =
 376                        (osb->sb->s_blocksize /
 377                         sizeof(struct ocfs2_extended_slot));
 378        else
 379                si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16);
 380
 381        /* The size checks above should ensure this */
 382        BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks);
 383
 384        mlog(0, "Slot map needs %u buffers for %llu bytes\n",
 385             si->si_blocks, bytes);
 386
 387        si->si_bh = kzalloc(sizeof(struct buffer_head *) * si->si_blocks,
 388                            GFP_KERNEL);
 389        if (!si->si_bh) {
 390                status = -ENOMEM;
 391                mlog_errno(status);
 392                goto bail;
 393        }
 394
 395        for (i = 0; i < si->si_blocks; i++) {
 396                status = ocfs2_extent_map_get_blocks(si->si_inode, i,
 397                                                     &blkno, NULL, NULL);
 398                if (status < 0) {
 399                        mlog_errno(status);
 400                        goto bail;
 401                }
 402
 403                mlog(0, "Reading slot map block %u at %llu\n", i,
 404                     (unsigned long long)blkno);
 405
 406                bh = NULL;  /* Acquire a fresh bh */
 407                status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
 408                                           1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
 409                if (status < 0) {
 410                        mlog_errno(status);
 411                        goto bail;
 412                }
 413
 414                si->si_bh[i] = bh;
 415        }
 416
 417bail:
 418        return status;
 419}
 420
 421int ocfs2_init_slot_info(struct ocfs2_super *osb)
 422{
 423        int status;
 424        struct inode *inode = NULL;
 425        struct ocfs2_slot_info *si;
 426
 427        si = kzalloc(sizeof(struct ocfs2_slot_info) +
 428                     (sizeof(struct ocfs2_slot) * osb->max_slots),
 429                     GFP_KERNEL);
 430        if (!si) {
 431                status = -ENOMEM;
 432                mlog_errno(status);
 433                goto bail;
 434        }
 435
 436        si->si_extended = ocfs2_uses_extended_slot_map(osb);
 437        si->si_num_slots = osb->max_slots;
 438        si->si_slots = (struct ocfs2_slot *)((char *)si +
 439                                             sizeof(struct ocfs2_slot_info));
 440
 441        inode = ocfs2_get_system_file_inode(osb, SLOT_MAP_SYSTEM_INODE,
 442                                            OCFS2_INVALID_SLOT);
 443        if (!inode) {
 444                status = -EINVAL;
 445                mlog_errno(status);
 446                goto bail;
 447        }
 448
 449        si->si_inode = inode;
 450        status = ocfs2_map_slot_buffers(osb, si);
 451        if (status < 0) {
 452                mlog_errno(status);
 453                goto bail;
 454        }
 455
 456        osb->slot_info = (struct ocfs2_slot_info *)si;
 457bail:
 458        if (status < 0 && si)
 459                __ocfs2_free_slot_info(si);
 460
 461        return status;
 462}
 463
 464void ocfs2_free_slot_info(struct ocfs2_super *osb)
 465{
 466        struct ocfs2_slot_info *si = osb->slot_info;
 467
 468        osb->slot_info = NULL;
 469        __ocfs2_free_slot_info(si);
 470}
 471
 472int ocfs2_find_slot(struct ocfs2_super *osb)
 473{
 474        int status;
 475        int slot;
 476        struct ocfs2_slot_info *si;
 477
 478        mlog_entry_void();
 479
 480        si = osb->slot_info;
 481
 482        spin_lock(&osb->osb_lock);
 483        ocfs2_update_slot_info(si);
 484
 485        /* search for ourselves first and take the slot if it already
 486         * exists. Perhaps we need to mark this in a variable for our
 487         * own journal recovery? Possibly not, though we certainly
 488         * need to warn to the user */
 489        slot = __ocfs2_node_num_to_slot(si, osb->node_num);
 490        if (slot < 0) {
 491                /* if no slot yet, then just take 1st available
 492                 * one. */
 493                slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
 494                if (slot < 0) {
 495                        spin_unlock(&osb->osb_lock);
 496                        mlog(ML_ERROR, "no free slots available!\n");
 497                        status = -EINVAL;
 498                        goto bail;
 499                }
 500        } else
 501                mlog(ML_NOTICE, "slot %d is already allocated to this node!\n",
 502                     slot);
 503
 504        ocfs2_set_slot(si, slot, osb->node_num);
 505        osb->slot_num = slot;
 506        spin_unlock(&osb->osb_lock);
 507
 508        mlog(0, "taking node slot %d\n", osb->slot_num);
 509
 510        status = ocfs2_update_disk_slot(osb, si, osb->slot_num);
 511        if (status < 0)
 512                mlog_errno(status);
 513
 514bail:
 515        mlog_exit(status);
 516        return status;
 517}
 518
 519void ocfs2_put_slot(struct ocfs2_super *osb)
 520{
 521        int status, slot_num;
 522        struct ocfs2_slot_info *si = osb->slot_info;
 523
 524        if (!si)
 525                return;
 526
 527        spin_lock(&osb->osb_lock);
 528        ocfs2_update_slot_info(si);
 529
 530        slot_num = osb->slot_num;
 531        ocfs2_invalidate_slot(si, osb->slot_num);
 532        osb->slot_num = OCFS2_INVALID_SLOT;
 533        spin_unlock(&osb->osb_lock);
 534
 535        status = ocfs2_update_disk_slot(osb, si, slot_num);
 536        if (status < 0) {
 537                mlog_errno(status);
 538                goto bail;
 539        }
 540
 541bail:
 542        ocfs2_free_slot_info(osb);
 543}
 544
 545
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.