linux/fs/ocfs2/extent_map.c
<<
>>
Prefs
   1/* -*- mode: c; c-basic-offset: 8; -*-
   2 * vim: noexpandtab sw=8 ts=8 sts=0:
   3 *
   4 * extent_map.c
   5 *
   6 * Block/Cluster mapping functions
   7 *
   8 * Copyright (C) 2004 Oracle.  All rights reserved.
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public
  12 * License, version 2,  as published by the Free Software Foundation.
  13 *
  14 * This program is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * General Public License for more details.
  18 *
  19 * You should have received a copy of the GNU General Public
  20 * License along with this program; if not, write to the
  21 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  22 * Boston, MA 021110-1307, USA.
  23 */
  24
  25#include <linux/fs.h>
  26#include <linux/init.h>
  27#include <linux/slab.h>
  28#include <linux/types.h>
  29#include <linux/fiemap.h>
  30
  31#include <cluster/masklog.h>
  32
  33#include "ocfs2.h"
  34
  35#include "alloc.h"
  36#include "dlmglue.h"
  37#include "extent_map.h"
  38#include "inode.h"
  39#include "super.h"
  40#include "symlink.h"
  41#include "ocfs2_trace.h"
  42
  43#include "buffer_head_io.h"
  44
  45/*
  46 * The extent caching implementation is intentionally trivial.
  47 *
  48 * We only cache a small number of extents stored directly on the
  49 * inode, so linear order operations are acceptable. If we ever want
  50 * to increase the size of the extent map, then these algorithms must
  51 * get smarter.
  52 */
  53
  54void ocfs2_extent_map_init(struct inode *inode)
  55{
  56        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  57
  58        oi->ip_extent_map.em_num_items = 0;
  59        INIT_LIST_HEAD(&oi->ip_extent_map.em_list);
  60}
  61
  62static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em,
  63                                      unsigned int cpos,
  64                                      struct ocfs2_extent_map_item **ret_emi)
  65{
  66        unsigned int range;
  67        struct ocfs2_extent_map_item *emi;
  68
  69        *ret_emi = NULL;
  70
  71        list_for_each_entry(emi, &em->em_list, ei_list) {
  72                range = emi->ei_cpos + emi->ei_clusters;
  73
  74                if (cpos >= emi->ei_cpos && cpos < range) {
  75                        list_move(&emi->ei_list, &em->em_list);
  76
  77                        *ret_emi = emi;
  78                        break;
  79                }
  80        }
  81}
  82
  83static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
  84                                   unsigned int *phys, unsigned int *len,
  85                                   unsigned int *flags)
  86{
  87        unsigned int coff;
  88        struct ocfs2_inode_info *oi = OCFS2_I(inode);
  89        struct ocfs2_extent_map_item *emi;
  90
  91        spin_lock(&oi->ip_lock);
  92
  93        __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi);
  94        if (emi) {
  95                coff = cpos - emi->ei_cpos;
  96                *phys = emi->ei_phys + coff;
  97                if (len)
  98                        *len = emi->ei_clusters - coff;
  99                if (flags)
 100                        *flags = emi->ei_flags;
 101        }
 102
 103        spin_unlock(&oi->ip_lock);
 104
 105        if (emi == NULL)
 106                return -ENOENT;
 107
 108        return 0;
 109}
 110
 111/*
 112 * Forget about all clusters equal to or greater than cpos.
 113 */
 114void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
 115{
 116        struct ocfs2_extent_map_item *emi, *n;
 117        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 118        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 119        LIST_HEAD(tmp_list);
 120        unsigned int range;
 121
 122        spin_lock(&oi->ip_lock);
 123        list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
 124                if (emi->ei_cpos >= cpos) {
 125                        /* Full truncate of this record. */
 126                        list_move(&emi->ei_list, &tmp_list);
 127                        BUG_ON(em->em_num_items == 0);
 128                        em->em_num_items--;
 129                        continue;
 130                }
 131
 132                range = emi->ei_cpos + emi->ei_clusters;
 133                if (range > cpos) {
 134                        /* Partial truncate */
 135                        emi->ei_clusters = cpos - emi->ei_cpos;
 136                }
 137        }
 138        spin_unlock(&oi->ip_lock);
 139
 140        list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
 141                list_del(&emi->ei_list);
 142                kfree(emi);
 143        }
 144}
 145
 146/*
 147 * Is any part of emi2 contained within emi1
 148 */
 149static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1,
 150                                 struct ocfs2_extent_map_item *emi2)
 151{
 152        unsigned int range1, range2;
 153
 154        /*
 155         * Check if logical start of emi2 is inside emi1
 156         */
 157        range1 = emi1->ei_cpos + emi1->ei_clusters;
 158        if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1)
 159                return 1;
 160
 161        /*
 162         * Check if logical end of emi2 is inside emi1
 163         */
 164        range2 = emi2->ei_cpos + emi2->ei_clusters;
 165        if (range2 > emi1->ei_cpos && range2 <= range1)
 166                return 1;
 167
 168        return 0;
 169}
 170
 171static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest,
 172                                  struct ocfs2_extent_map_item *src)
 173{
 174        dest->ei_cpos = src->ei_cpos;
 175        dest->ei_phys = src->ei_phys;
 176        dest->ei_clusters = src->ei_clusters;
 177        dest->ei_flags = src->ei_flags;
 178}
 179
 180/*
 181 * Try to merge emi with ins. Returns 1 if merge succeeds, zero
 182 * otherwise.
 183 */
 184static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
 185                                         struct ocfs2_extent_map_item *ins)
 186{
 187        /*
 188         * Handle contiguousness
 189         */
 190        if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) &&
 191            ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) &&
 192            ins->ei_flags == emi->ei_flags) {
 193                emi->ei_clusters += ins->ei_clusters;
 194                return 1;
 195        } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
 196                   (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
 197                   ins->ei_flags == emi->ei_flags) {
 198                emi->ei_phys = ins->ei_phys;
 199                emi->ei_cpos = ins->ei_cpos;
 200                emi->ei_clusters += ins->ei_clusters;
 201                return 1;
 202        }
 203
 204        /*
 205         * Overlapping extents - this shouldn't happen unless we've
 206         * split an extent to change it's flags. That is exceedingly
 207         * rare, so there's no sense in trying to optimize it yet.
 208         */
 209        if (ocfs2_ei_is_contained(emi, ins) ||
 210            ocfs2_ei_is_contained(ins, emi)) {
 211                ocfs2_copy_emi_fields(emi, ins);
 212                return 1;
 213        }
 214
 215        /* No merge was possible. */
 216        return 0;
 217}
 218
 219/*
 220 * In order to reduce complexity on the caller, this insert function
 221 * is intentionally liberal in what it will accept.
 222 *
 223 * The only rule is that the truncate call *must* be used whenever
 224 * records have been deleted. This avoids inserting overlapping
 225 * records with different physical mappings.
 226 */
 227void ocfs2_extent_map_insert_rec(struct inode *inode,
 228                                 struct ocfs2_extent_rec *rec)
 229{
 230        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 231        struct ocfs2_extent_map *em = &oi->ip_extent_map;
 232        struct ocfs2_extent_map_item *emi, *new_emi = NULL;
 233        struct ocfs2_extent_map_item ins;
 234
 235        ins.ei_cpos = le32_to_cpu(rec->e_cpos);
 236        ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb,
 237                                               le64_to_cpu(rec->e_blkno));
 238        ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters);
 239        ins.ei_flags = rec->e_flags;
 240
 241search:
 242        spin_lock(&oi->ip_lock);
 243
 244        list_for_each_entry(emi, &em->em_list, ei_list) {
 245                if (ocfs2_try_to_merge_extent_map(emi, &ins)) {
 246                        list_move(&emi->ei_list, &em->em_list);
 247                        spin_unlock(&oi->ip_lock);
 248                        goto out;
 249                }
 250        }
 251
 252        /*
 253         * No item could be merged.
 254         *
 255         * Either allocate and add a new item, or overwrite the last recently
 256         * inserted.
 257         */
 258
 259        if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) {
 260                if (new_emi == NULL) {
 261                        spin_unlock(&oi->ip_lock);
 262
 263                        new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS);
 264                        if (new_emi == NULL)
 265                                goto out;
 266
 267                        goto search;
 268                }
 269
 270                ocfs2_copy_emi_fields(new_emi, &ins);
 271                list_add(&new_emi->ei_list, &em->em_list);
 272                em->em_num_items++;
 273                new_emi = NULL;
 274        } else {
 275                BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0);
 276                emi = list_entry(em->em_list.prev,
 277                                 struct ocfs2_extent_map_item, ei_list);
 278                list_move(&emi->ei_list, &em->em_list);
 279                ocfs2_copy_emi_fields(emi, &ins);
 280        }
 281
 282        spin_unlock(&oi->ip_lock);
 283
 284out:
 285        if (new_emi)
 286                kfree(new_emi);
 287}
 288
 289static int ocfs2_last_eb_is_empty(struct inode *inode,
 290                                  struct ocfs2_dinode *di)
 291{
 292        int ret, next_free;
 293        u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk);
 294        struct buffer_head *eb_bh = NULL;
 295        struct ocfs2_extent_block *eb;
 296        struct ocfs2_extent_list *el;
 297
 298        ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
 299        if (ret) {
 300                mlog_errno(ret);
 301                goto out;
 302        }
 303
 304        eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 305        el = &eb->h_list;
 306
 307        if (el->l_tree_depth) {
 308                ocfs2_error(inode->i_sb,
 309                            "Inode %lu has non zero tree depth in "
 310                            "leaf block %llu\n", inode->i_ino,
 311                            (unsigned long long)eb_bh->b_blocknr);
 312                ret = -EROFS;
 313                goto out;
 314        }
 315
 316        next_free = le16_to_cpu(el->l_next_free_rec);
 317
 318        if (next_free == 0 ||
 319            (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0])))
 320                ret = 1;
 321
 322out:
 323        brelse(eb_bh);
 324        return ret;
 325}
 326
 327/*
 328 * Return the 1st index within el which contains an extent start
 329 * larger than v_cluster.
 330 */
 331static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
 332                                       u32 v_cluster)
 333{
 334        int i;
 335        struct ocfs2_extent_rec *rec;
 336
 337        for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
 338                rec = &el->l_recs[i];
 339
 340                if (v_cluster < le32_to_cpu(rec->e_cpos))
 341                        break;
 342        }
 343
 344        return i;
 345}
 346
 347/*
 348 * Figure out the size of a hole which starts at v_cluster within the given
 349 * extent list.
 350 *
 351 * If there is no more allocation past v_cluster, we return the maximum
 352 * cluster size minus v_cluster.
 353 *
 354 * If we have in-inode extents, then el points to the dinode list and
 355 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
 356 * containing el.
 357 */
 358int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
 359                               struct ocfs2_extent_list *el,
 360                               struct buffer_head *eb_bh,
 361                               u32 v_cluster,
 362                               u32 *num_clusters)
 363{
 364        int ret, i;
 365        struct buffer_head *next_eb_bh = NULL;
 366        struct ocfs2_extent_block *eb, *next_eb;
 367
 368        i = ocfs2_search_for_hole_index(el, v_cluster);
 369
 370        if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) {
 371                eb = (struct ocfs2_extent_block *)eb_bh->b_data;
 372
 373                /*
 374                 * Check the next leaf for any extents.
 375                 */
 376
 377                if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
 378                        goto no_more_extents;
 379
 380                ret = ocfs2_read_extent_block(ci,
 381                                              le64_to_cpu(eb->h_next_leaf_blk),
 382                                              &next_eb_bh);
 383                if (ret) {
 384                        mlog_errno(ret);
 385                        goto out;
 386                }
 387
 388                next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
 389                el = &next_eb->h_list;
 390                i = ocfs2_search_for_hole_index(el, v_cluster);
 391        }
 392
 393no_more_extents:
 394        if (i == le16_to_cpu(el->l_next_free_rec)) {
 395                /*
 396                 * We're at the end of our existing allocation. Just
 397                 * return the maximum number of clusters we could
 398                 * possibly allocate.
 399                 */
 400                *num_clusters = UINT_MAX - v_cluster;
 401        } else {
 402                *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster;
 403        }
 404
 405        ret = 0;
 406out:
 407        brelse(next_eb_bh);
 408        return ret;
 409}
 410
 411static int ocfs2_get_clusters_nocache(struct inode *inode,
 412                                      struct buffer_head *di_bh,
 413                                      u32 v_cluster, unsigned int *hole_len,
 414                                      struct ocfs2_extent_rec *ret_rec,
 415                                      unsigned int *is_last)
 416{
 417        int i, ret, tree_height, len;
 418        struct ocfs2_dinode *di;
 419        struct ocfs2_extent_block *uninitialized_var(eb);
 420        struct ocfs2_extent_list *el;
 421        struct ocfs2_extent_rec *rec;
 422        struct buffer_head *eb_bh = NULL;
 423
 424        memset(ret_rec, 0, sizeof(*ret_rec));
 425        if (is_last)
 426                *is_last = 0;
 427
 428        di = (struct ocfs2_dinode *) di_bh->b_data;
 429        el = &di->id2.i_list;
 430        tree_height = le16_to_cpu(el->l_tree_depth);
 431
 432        if (tree_height > 0) {
 433                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 434                                      &eb_bh);
 435                if (ret) {
 436                        mlog_errno(ret);
 437                        goto out;
 438                }
 439
 440                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 441                el = &eb->h_list;
 442
 443                if (el->l_tree_depth) {
 444                        ocfs2_error(inode->i_sb,
 445                                    "Inode %lu has non zero tree depth in "
 446                                    "leaf block %llu\n", inode->i_ino,
 447                                    (unsigned long long)eb_bh->b_blocknr);
 448                        ret = -EROFS;
 449                        goto out;
 450                }
 451        }
 452
 453        i = ocfs2_search_extent_list(el, v_cluster);
 454        if (i == -1) {
 455                /*
 456                 * Holes can be larger than the maximum size of an
 457                 * extent, so we return their lengths in a separate
 458                 * field.
 459                 */
 460                if (hole_len) {
 461                        ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
 462                                                         el, eb_bh,
 463                                                         v_cluster, &len);
 464                        if (ret) {
 465                                mlog_errno(ret);
 466                                goto out;
 467                        }
 468
 469                        *hole_len = len;
 470                }
 471                goto out_hole;
 472        }
 473
 474        rec = &el->l_recs[i];
 475
 476        BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 477
 478        if (!rec->e_blkno) {
 479                ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
 480                            "record (%u, %u, 0)", inode->i_ino,
 481                            le32_to_cpu(rec->e_cpos),
 482                            ocfs2_rec_clusters(el, rec));
 483                ret = -EROFS;
 484                goto out;
 485        }
 486
 487        *ret_rec = *rec;
 488
 489        /*
 490         * Checking for last extent is potentially expensive - we
 491         * might have to look at the next leaf over to see if it's
 492         * empty.
 493         *
 494         * The first two checks are to see whether the caller even
 495         * cares for this information, and if the extent is at least
 496         * the last in it's list.
 497         *
 498         * If those hold true, then the extent is last if any of the
 499         * additional conditions hold true:
 500         *  - Extent list is in-inode
 501         *  - Extent list is right-most
 502         *  - Extent list is 2nd to rightmost, with empty right-most
 503         */
 504        if (is_last) {
 505                if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) {
 506                        if (tree_height == 0)
 507                                *is_last = 1;
 508                        else if (eb->h_blkno == di->i_last_eb_blk)
 509                                *is_last = 1;
 510                        else if (eb->h_next_leaf_blk == di->i_last_eb_blk) {
 511                                ret = ocfs2_last_eb_is_empty(inode, di);
 512                                if (ret < 0) {
 513                                        mlog_errno(ret);
 514                                        goto out;
 515                                }
 516                                if (ret == 1)
 517                                        *is_last = 1;
 518                        }
 519                }
 520        }
 521
 522out_hole:
 523        ret = 0;
 524out:
 525        brelse(eb_bh);
 526        return ret;
 527}
 528
 529static void ocfs2_relative_extent_offsets(struct super_block *sb,
 530                                          u32 v_cluster,
 531                                          struct ocfs2_extent_rec *rec,
 532                                          u32 *p_cluster, u32 *num_clusters)
 533
 534{
 535        u32 coff = v_cluster - le32_to_cpu(rec->e_cpos);
 536
 537        *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno));
 538        *p_cluster = *p_cluster + coff;
 539
 540        if (num_clusters)
 541                *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
 542}
 543
 544int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
 545                             u32 *p_cluster, u32 *num_clusters,
 546                             struct ocfs2_extent_list *el,
 547                             unsigned int *extent_flags)
 548{
 549        int ret = 0, i;
 550        struct buffer_head *eb_bh = NULL;
 551        struct ocfs2_extent_block *eb;
 552        struct ocfs2_extent_rec *rec;
 553        u32 coff;
 554
 555        if (el->l_tree_depth) {
 556                ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
 557                                      &eb_bh);
 558                if (ret) {
 559                        mlog_errno(ret);
 560                        goto out;
 561                }
 562
 563                eb = (struct ocfs2_extent_block *) eb_bh->b_data;
 564                el = &eb->h_list;
 565
 566                if (el->l_tree_depth) {
 567                        ocfs2_error(inode->i_sb,
 568                                    "Inode %lu has non zero tree depth in "
 569                                    "xattr leaf block %llu\n", inode->i_ino,
 570                                    (unsigned long long)eb_bh->b_blocknr);
 571                        ret = -EROFS;
 572                        goto out;
 573                }
 574        }
 575
 576        i = ocfs2_search_extent_list(el, v_cluster);
 577        if (i == -1) {
 578                ret = -EROFS;
 579                mlog_errno(ret);
 580                goto out;
 581        } else {
 582                rec = &el->l_recs[i];
 583                BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
 584
 585                if (!rec->e_blkno) {
 586                        ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
 587                                    "record (%u, %u, 0) in xattr", inode->i_ino,
 588                                    le32_to_cpu(rec->e_cpos),
 589                                    ocfs2_rec_clusters(el, rec));
 590                        ret = -EROFS;
 591                        goto out;
 592                }
 593                coff = v_cluster - le32_to_cpu(rec->e_cpos);
 594                *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
 595                                                    le64_to_cpu(rec->e_blkno));
 596                *p_cluster = *p_cluster + coff;
 597                if (num_clusters)
 598                        *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
 599
 600                if (extent_flags)
 601                        *extent_flags = rec->e_flags;
 602        }
 603out:
 604        if (eb_bh)
 605                brelse(eb_bh);
 606        return ret;
 607}
 608
 609int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
 610                       u32 *p_cluster, u32 *num_clusters,
 611                       unsigned int *extent_flags)
 612{
 613        int ret;
 614        unsigned int uninitialized_var(hole_len), flags = 0;
 615        struct buffer_head *di_bh = NULL;
 616        struct ocfs2_extent_rec rec;
 617
 618        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 619                ret = -ERANGE;
 620                mlog_errno(ret);
 621                goto out;
 622        }
 623
 624        ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
 625                                      num_clusters, extent_flags);
 626        if (ret == 0)
 627                goto out;
 628
 629        ret = ocfs2_read_inode_block(inode, &di_bh);
 630        if (ret) {
 631                mlog_errno(ret);
 632                goto out;
 633        }
 634
 635        ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len,
 636                                         &rec, NULL);
 637        if (ret) {
 638                mlog_errno(ret);
 639                goto out;
 640        }
 641
 642        if (rec.e_blkno == 0ULL) {
 643                /*
 644                 * A hole was found. Return some canned values that
 645                 * callers can key on. If asked for, num_clusters will
 646                 * be populated with the size of the hole.
 647                 */
 648                *p_cluster = 0;
 649                if (num_clusters) {
 650                        *num_clusters = hole_len;
 651                }
 652        } else {
 653                ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec,
 654                                              p_cluster, num_clusters);
 655                flags = rec.e_flags;
 656
 657                ocfs2_extent_map_insert_rec(inode, &rec);
 658        }
 659
 660        if (extent_flags)
 661                *extent_flags = flags;
 662
 663out:
 664        brelse(di_bh);
 665        return ret;
 666}
 667
 668/*
 669 * This expects alloc_sem to be held. The allocation cannot change at
 670 * all while the map is in the process of being updated.
 671 */
 672int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
 673                                u64 *ret_count, unsigned int *extent_flags)
 674{
 675        int ret;
 676        int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
 677        u32 cpos, num_clusters, p_cluster;
 678        u64 boff = 0;
 679
 680        cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno);
 681
 682        ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters,
 683                                 extent_flags);
 684        if (ret) {
 685                mlog_errno(ret);
 686                goto out;
 687        }
 688
 689        /*
 690         * p_cluster == 0 indicates a hole.
 691         */
 692        if (p_cluster) {
 693                boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
 694                boff += (v_blkno & (u64)(bpc - 1));
 695        }
 696
 697        *p_blkno = boff;
 698
 699        if (ret_count) {
 700                *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
 701                *ret_count -= v_blkno & (u64)(bpc - 1);
 702        }
 703
 704out:
 705        return ret;
 706}
 707
 708/*
 709 * The ocfs2_fiemap_inline() may be a little bit misleading, since
 710 * it not only handles the fiemap for inlined files, but also deals
 711 * with the fast symlink, cause they have no difference for extent
 712 * mapping per se.
 713 */
 714static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
 715                               struct fiemap_extent_info *fieinfo,
 716                               u64 map_start)
 717{
 718        int ret;
 719        unsigned int id_count;
 720        struct ocfs2_dinode *di;
 721        u64 phys;
 722        u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
 723        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 724
 725        di = (struct ocfs2_dinode *)di_bh->b_data;
 726        if (ocfs2_inode_is_fast_symlink(inode))
 727                id_count = ocfs2_fast_symlink_chars(inode->i_sb);
 728        else
 729                id_count = le16_to_cpu(di->id2.i_data.id_count);
 730
 731        if (map_start < id_count) {
 732                phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
 733                if (ocfs2_inode_is_fast_symlink(inode))
 734                        phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
 735                else
 736                        phys += offsetof(struct ocfs2_dinode,
 737                                         id2.i_data.id_data);
 738
 739                ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
 740                                              flags);
 741                if (ret < 0)
 742                        return ret;
 743        }
 744
 745        return 0;
 746}
 747
 748#define OCFS2_FIEMAP_FLAGS      (FIEMAP_FLAG_SYNC)
 749
 750int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 751                 u64 map_start, u64 map_len)
 752{
 753        int ret, is_last;
 754        u32 mapping_end, cpos;
 755        unsigned int hole_size;
 756        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 757        u64 len_bytes, phys_bytes, virt_bytes;
 758        struct buffer_head *di_bh = NULL;
 759        struct ocfs2_extent_rec rec;
 760
 761        ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS);
 762        if (ret)
 763                return ret;
 764
 765        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 766        if (ret) {
 767                mlog_errno(ret);
 768                goto out;
 769        }
 770
 771        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 772
 773        /*
 774         * Handle inline-data and fast symlink separately.
 775         */
 776        if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
 777            ocfs2_inode_is_fast_symlink(inode)) {
 778                ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
 779                goto out_unlock;
 780        }
 781
 782        cpos = map_start >> osb->s_clustersize_bits;
 783        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
 784                                               map_start + map_len);
 785        mapping_end -= cpos;
 786        is_last = 0;
 787        while (cpos < mapping_end && !is_last) {
 788                u32 fe_flags;
 789
 790                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos,
 791                                                 &hole_size, &rec, &is_last);
 792                if (ret) {
 793                        mlog_errno(ret);
 794                        goto out;
 795                }
 796
 797                if (rec.e_blkno == 0ULL) {
 798                        cpos += hole_size;
 799                        continue;
 800                }
 801
 802                fe_flags = 0;
 803                if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
 804                        fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
 805                if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
 806                        fe_flags |= FIEMAP_EXTENT_SHARED;
 807                if (is_last)
 808                        fe_flags |= FIEMAP_EXTENT_LAST;
 809                len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
 810                phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
 811                virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
 812
 813                ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
 814                                              len_bytes, fe_flags);
 815                if (ret)
 816                        break;
 817
 818                cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters);
 819        }
 820
 821        if (ret > 0)
 822                ret = 0;
 823
 824out_unlock:
 825        brelse(di_bh);
 826
 827        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 828
 829        ocfs2_inode_unlock(inode, 0);
 830out:
 831
 832        return ret;
 833}
 834
 835int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int origin)
 836{
 837        struct inode *inode = file->f_mapping->host;
 838        int ret;
 839        unsigned int is_last = 0, is_data = 0;
 840        u16 cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
 841        u32 cpos, cend, clen, hole_size;
 842        u64 extoff, extlen;
 843        struct buffer_head *di_bh = NULL;
 844        struct ocfs2_extent_rec rec;
 845
 846        BUG_ON(origin != SEEK_DATA && origin != SEEK_HOLE);
 847
 848        ret = ocfs2_inode_lock(inode, &di_bh, 0);
 849        if (ret) {
 850                mlog_errno(ret);
 851                goto out;
 852        }
 853
 854        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 855
 856        if (*offset >= inode->i_size) {
 857                ret = -ENXIO;
 858                goto out_unlock;
 859        }
 860
 861        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
 862                if (origin == SEEK_HOLE)
 863                        *offset = inode->i_size;
 864                goto out_unlock;
 865        }
 866
 867        clen = 0;
 868        cpos = *offset >> cs_bits;
 869        cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size);
 870
 871        while (cpos < cend && !is_last) {
 872                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
 873                                                 &rec, &is_last);
 874                if (ret) {
 875                        mlog_errno(ret);
 876                        goto out_unlock;
 877                }
 878
 879                extoff = cpos;
 880                extoff <<= cs_bits;
 881
 882                if (rec.e_blkno == 0ULL) {
 883                        clen = hole_size;
 884                        is_data = 0;
 885                } else {
 886                        clen = le16_to_cpu(rec.e_leaf_clusters) -
 887                                (cpos - le32_to_cpu(rec.e_cpos));
 888                        is_data = (rec.e_flags & OCFS2_EXT_UNWRITTEN) ?  0 : 1;
 889                }
 890
 891                if ((!is_data && origin == SEEK_HOLE) ||
 892                    (is_data && origin == SEEK_DATA)) {
 893                        if (extoff > *offset)
 894                                *offset = extoff;
 895                        goto out_unlock;
 896                }
 897
 898                if (!is_last)
 899                        cpos += clen;
 900        }
 901
 902        if (origin == SEEK_HOLE) {
 903                extoff = cpos;
 904                extoff <<= cs_bits;
 905                extlen = clen;
 906                extlen <<=  cs_bits;
 907
 908                if ((extoff + extlen) > inode->i_size)
 909                        extlen = inode->i_size - extoff;
 910                extoff += extlen;
 911                if (extoff > *offset)
 912                        *offset = extoff;
 913                goto out_unlock;
 914        }
 915
 916        ret = -ENXIO;
 917
 918out_unlock:
 919
 920        brelse(di_bh);
 921
 922        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 923
 924        ocfs2_inode_unlock(inode, 0);
 925out:
 926        return ret;
 927}
 928
 929int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
 930                           struct buffer_head *bhs[], int flags,
 931                           int (*validate)(struct super_block *sb,
 932                                           struct buffer_head *bh))
 933{
 934        int rc = 0;
 935        u64 p_block, p_count;
 936        int i, count, done = 0;
 937
 938        trace_ocfs2_read_virt_blocks(
 939             inode, (unsigned long long)v_block, nr, bhs, flags,
 940             validate);
 941
 942        if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
 943            i_size_read(inode)) {
 944                BUG_ON(!(flags & OCFS2_BH_READAHEAD));
 945                goto out;
 946        }
 947
 948        while (done < nr) {
 949                down_read(&OCFS2_I(inode)->ip_alloc_sem);
 950                rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
 951                                                 &p_block, &p_count, NULL);
 952                up_read(&OCFS2_I(inode)->ip_alloc_sem);
 953                if (rc) {
 954                        mlog_errno(rc);
 955                        break;
 956                }
 957
 958                if (!p_block) {
 959                        rc = -EIO;
 960                        mlog(ML_ERROR,
 961                             "Inode #%llu contains a hole at offset %llu\n",
 962                             (unsigned long long)OCFS2_I(inode)->ip_blkno,
 963                             (unsigned long long)(v_block + done) <<
 964                             inode->i_sb->s_blocksize_bits);
 965                        break;
 966                }
 967
 968                count = nr - done;
 969                if (p_count < count)
 970                        count = p_count;
 971
 972                /*
 973                 * If the caller passed us bhs, they should have come
 974                 * from a previous readahead call to this function.  Thus,
 975                 * they should have the right b_blocknr.
 976                 */
 977                for (i = 0; i < count; i++) {
 978                        if (!bhs[done + i])
 979                                continue;
 980                        BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
 981                }
 982
 983                rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
 984                                       bhs + done, flags, validate);
 985                if (rc) {
 986                        mlog_errno(rc);
 987                        break;
 988                }
 989                done += count;
 990        }
 991
 992out:
 993        return rc;
 994}
 995
 996
 997
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.