linux/fs/xfs/xfs_ialloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_inum.h"
  24#include "xfs_trans.h"
  25#include "xfs_sb.h"
  26#include "xfs_ag.h"
  27#include "xfs_mount.h"
  28#include "xfs_bmap_btree.h"
  29#include "xfs_alloc_btree.h"
  30#include "xfs_ialloc_btree.h"
  31#include "xfs_dinode.h"
  32#include "xfs_inode.h"
  33#include "xfs_btree.h"
  34#include "xfs_ialloc.h"
  35#include "xfs_alloc.h"
  36#include "xfs_rtalloc.h"
  37#include "xfs_error.h"
  38#include "xfs_bmap.h"
  39
  40
  41/*
  42 * Allocation group level functions.
  43 */
  44static inline int
  45xfs_ialloc_cluster_alignment(
  46        xfs_alloc_arg_t *args)
  47{
  48        if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
  49            args->mp->m_sb.sb_inoalignmt >=
  50             XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
  51                return args->mp->m_sb.sb_inoalignmt;
  52        return 1;
  53}
  54
  55/*
  56 * Lookup a record by ino in the btree given by cur.
  57 */
  58int                                     /* error */
  59xfs_inobt_lookup(
  60        struct xfs_btree_cur    *cur,   /* btree cursor */
  61        xfs_agino_t             ino,    /* starting inode of chunk */
  62        xfs_lookup_t            dir,    /* <=, >=, == */
  63        int                     *stat)  /* success/failure */
  64{
  65        cur->bc_rec.i.ir_startino = ino;
  66        cur->bc_rec.i.ir_freecount = 0;
  67        cur->bc_rec.i.ir_free = 0;
  68        return xfs_btree_lookup(cur, dir, stat);
  69}
  70
  71/*
  72 * Update the record referred to by cur to the value given.
  73 * This either works (return 0) or gets an EFSCORRUPTED error.
  74 */
  75STATIC int                              /* error */
  76xfs_inobt_update(
  77        struct xfs_btree_cur    *cur,   /* btree cursor */
  78        xfs_inobt_rec_incore_t  *irec)  /* btree record */
  79{
  80        union xfs_btree_rec     rec;
  81
  82        rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
  83        rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
  84        rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
  85        return xfs_btree_update(cur, &rec);
  86}
  87
  88/*
  89 * Get the data from the pointed-to record.
  90 */
  91int                                     /* error */
  92xfs_inobt_get_rec(
  93        struct xfs_btree_cur    *cur,   /* btree cursor */
  94        xfs_inobt_rec_incore_t  *irec,  /* btree record */
  95        int                     *stat)  /* output: success/failure */
  96{
  97        union xfs_btree_rec     *rec;
  98        int                     error;
  99
 100        error = xfs_btree_get_rec(cur, &rec, stat);
 101        if (!error && *stat == 1) {
 102                irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
 103                irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
 104                irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
 105        }
 106        return error;
 107}
 108
 109/*
 110 * Verify that the number of free inodes in the AGI is correct.
 111 */
 112#ifdef DEBUG
 113STATIC int
 114xfs_check_agi_freecount(
 115        struct xfs_btree_cur    *cur,
 116        struct xfs_agi          *agi)
 117{
 118        if (cur->bc_nlevels == 1) {
 119                xfs_inobt_rec_incore_t rec;
 120                int             freecount = 0;
 121                int             error;
 122                int             i;
 123
 124                error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
 125                if (error)
 126                        return error;
 127
 128                do {
 129                        error = xfs_inobt_get_rec(cur, &rec, &i);
 130                        if (error)
 131                                return error;
 132
 133                        if (i) {
 134                                freecount += rec.ir_freecount;
 135                                error = xfs_btree_increment(cur, 0, &i);
 136                                if (error)
 137                                        return error;
 138                        }
 139                } while (i == 1);
 140
 141                if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
 142                        ASSERT(freecount == be32_to_cpu(agi->agi_freecount));
 143        }
 144        return 0;
 145}
 146#else
 147#define xfs_check_agi_freecount(cur, agi)       0
 148#endif
 149
 150/*
 151 * Initialise a new set of inodes.
 152 */
 153STATIC int
 154xfs_ialloc_inode_init(
 155        struct xfs_mount        *mp,
 156        struct xfs_trans        *tp,
 157        xfs_agnumber_t          agno,
 158        xfs_agblock_t           agbno,
 159        xfs_agblock_t           length,
 160        unsigned int            gen)
 161{
 162        struct xfs_buf          *fbuf;
 163        struct xfs_dinode       *free;
 164        int                     blks_per_cluster, nbufs, ninodes;
 165        int                     version;
 166        int                     i, j;
 167        xfs_daddr_t             d;
 168
 169        /*
 170         * Loop over the new block(s), filling in the inodes.
 171         * For small block sizes, manipulate the inodes in buffers
 172         * which are multiples of the blocks size.
 173         */
 174        if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
 175                blks_per_cluster = 1;
 176                nbufs = length;
 177                ninodes = mp->m_sb.sb_inopblock;
 178        } else {
 179                blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
 180                                   mp->m_sb.sb_blocksize;
 181                nbufs = length / blks_per_cluster;
 182                ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
 183        }
 184
 185        /*
 186         * Figure out what version number to use in the inodes we create.
 187         * If the superblock version has caught up to the one that supports
 188         * the new inode format, then use the new inode version.  Otherwise
 189         * use the old version so that old kernels will continue to be
 190         * able to use the file system.
 191         */
 192        if (xfs_sb_version_hasnlink(&mp->m_sb))
 193                version = 2;
 194        else
 195                version = 1;
 196
 197        for (j = 0; j < nbufs; j++) {
 198                /*
 199                 * Get the block.
 200                 */
 201                d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
 202                fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
 203                                         mp->m_bsize * blks_per_cluster, 0);
 204                if (!fbuf)
 205                        return ENOMEM;
 206                /*
 207                 * Initialize all inodes in this buffer and then log them.
 208                 *
 209                 * XXX: It would be much better if we had just one transaction
 210                 *      to log a whole cluster of inodes instead of all the
 211                 *      individual transactions causing a lot of log traffic.
 212                 */
 213                xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
 214                for (i = 0; i < ninodes; i++) {
 215                        int     ioffset = i << mp->m_sb.sb_inodelog;
 216                        uint    isize = sizeof(struct xfs_dinode);
 217
 218                        free = xfs_make_iptr(mp, fbuf, i);
 219                        free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 220                        free->di_version = version;
 221                        free->di_gen = cpu_to_be32(gen);
 222                        free->di_next_unlinked = cpu_to_be32(NULLAGINO);
 223                        xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
 224                }
 225                xfs_trans_inode_alloc_buf(tp, fbuf);
 226        }
 227        return 0;
 228}
 229
 230/*
 231 * Allocate new inodes in the allocation group specified by agbp.
 232 * Return 0 for success, else error code.
 233 */
 234STATIC int                              /* error code or 0 */
 235xfs_ialloc_ag_alloc(
 236        xfs_trans_t     *tp,            /* transaction pointer */
 237        xfs_buf_t       *agbp,          /* alloc group buffer */
 238        int             *alloc)
 239{
 240        xfs_agi_t       *agi;           /* allocation group header */
 241        xfs_alloc_arg_t args;           /* allocation argument structure */
 242        xfs_btree_cur_t *cur;           /* inode btree cursor */
 243        xfs_agnumber_t  agno;
 244        int             error;
 245        int             i;
 246        xfs_agino_t     newino;         /* new first inode's number */
 247        xfs_agino_t     newlen;         /* new number of inodes */
 248        xfs_agino_t     thisino;        /* current inode number, for loop */
 249        int             isaligned = 0;  /* inode allocation at stripe unit */
 250                                        /* boundary */
 251        struct xfs_perag *pag;
 252
 253        memset(&args, 0, sizeof(args));
 254        args.tp = tp;
 255        args.mp = tp->t_mountp;
 256
 257        /*
 258         * Locking will ensure that we don't have two callers in here
 259         * at one time.
 260         */
 261        newlen = XFS_IALLOC_INODES(args.mp);
 262        if (args.mp->m_maxicount &&
 263            args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
 264                return XFS_ERROR(ENOSPC);
 265        args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
 266        /*
 267         * First try to allocate inodes contiguous with the last-allocated
 268         * chunk of inodes.  If the filesystem is striped, this will fill
 269         * an entire stripe unit with inodes.
 270         */
 271        agi = XFS_BUF_TO_AGI(agbp);
 272        newino = be32_to_cpu(agi->agi_newino);
 273        agno = be32_to_cpu(agi->agi_seqno);
 274        args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
 275                        XFS_IALLOC_BLOCKS(args.mp);
 276        if (likely(newino != NULLAGINO &&
 277                  (args.agbno < be32_to_cpu(agi->agi_length)))) {
 278                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
 279                args.type = XFS_ALLOCTYPE_THIS_BNO;
 280                args.mod = args.total = args.wasdel = args.isfl =
 281                        args.userdata = args.minalignslop = 0;
 282                args.prod = 1;
 283
 284                /*
 285                 * We need to take into account alignment here to ensure that
 286                 * we don't modify the free list if we fail to have an exact
 287                 * block. If we don't have an exact match, and every oher
 288                 * attempt allocation attempt fails, we'll end up cancelling
 289                 * a dirty transaction and shutting down.
 290                 *
 291                 * For an exact allocation, alignment must be 1,
 292                 * however we need to take cluster alignment into account when
 293                 * fixing up the freelist. Use the minalignslop field to
 294                 * indicate that extra blocks might be required for alignment,
 295                 * but not to use them in the actual exact allocation.
 296                 */
 297                args.alignment = 1;
 298                args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
 299
 300                /* Allow space for the inode btree to split. */
 301                args.minleft = args.mp->m_in_maxlevels - 1;
 302                if ((error = xfs_alloc_vextent(&args)))
 303                        return error;
 304        } else
 305                args.fsbno = NULLFSBLOCK;
 306
 307        if (unlikely(args.fsbno == NULLFSBLOCK)) {
 308                /*
 309                 * Set the alignment for the allocation.
 310                 * If stripe alignment is turned on then align at stripe unit
 311                 * boundary.
 312                 * If the cluster size is smaller than a filesystem block
 313                 * then we're doing I/O for inodes in filesystem block size
 314                 * pieces, so don't need alignment anyway.
 315                 */
 316                isaligned = 0;
 317                if (args.mp->m_sinoalign) {
 318                        ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
 319                        args.alignment = args.mp->m_dalign;
 320                        isaligned = 1;
 321                } else
 322                        args.alignment = xfs_ialloc_cluster_alignment(&args);
 323                /*
 324                 * Need to figure out where to allocate the inode blocks.
 325                 * Ideally they should be spaced out through the a.g.
 326                 * For now, just allocate blocks up front.
 327                 */
 328                args.agbno = be32_to_cpu(agi->agi_root);
 329                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
 330                /*
 331                 * Allocate a fixed-size extent of inodes.
 332                 */
 333                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 334                args.mod = args.total = args.wasdel = args.isfl =
 335                        args.userdata = args.minalignslop = 0;
 336                args.prod = 1;
 337                /*
 338                 * Allow space for the inode btree to split.
 339                 */
 340                args.minleft = args.mp->m_in_maxlevels - 1;
 341                if ((error = xfs_alloc_vextent(&args)))
 342                        return error;
 343        }
 344
 345        /*
 346         * If stripe alignment is turned on, then try again with cluster
 347         * alignment.
 348         */
 349        if (isaligned && args.fsbno == NULLFSBLOCK) {
 350                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 351                args.agbno = be32_to_cpu(agi->agi_root);
 352                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
 353                args.alignment = xfs_ialloc_cluster_alignment(&args);
 354                if ((error = xfs_alloc_vextent(&args)))
 355                        return error;
 356        }
 357
 358        if (args.fsbno == NULLFSBLOCK) {
 359                *alloc = 0;
 360                return 0;
 361        }
 362        ASSERT(args.len == args.minlen);
 363
 364        /*
 365         * Stamp and write the inode buffers.
 366         *
 367         * Seed the new inode cluster with a random generation number. This
 368         * prevents short-term reuse of generation numbers if a chunk is
 369         * freed and then immediately reallocated. We use random numbers
 370         * rather than a linear progression to prevent the next generation
 371         * number from being easily guessable.
 372         */
 373        error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
 374                        args.len, random32());
 375
 376        if (error)
 377                return error;
 378        /*
 379         * Convert the results.
 380         */
 381        newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
 382        be32_add_cpu(&agi->agi_count, newlen);
 383        be32_add_cpu(&agi->agi_freecount, newlen);
 384        pag = xfs_perag_get(args.mp, agno);
 385        pag->pagi_freecount += newlen;
 386        xfs_perag_put(pag);
 387        agi->agi_newino = cpu_to_be32(newino);
 388
 389        /*
 390         * Insert records describing the new inode chunk into the btree.
 391         */
 392        cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
 393        for (thisino = newino;
 394             thisino < newino + newlen;
 395             thisino += XFS_INODES_PER_CHUNK) {
 396                cur->bc_rec.i.ir_startino = thisino;
 397                cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
 398                cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
 399                error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
 400                if (error) {
 401                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 402                        return error;
 403                }
 404                ASSERT(i == 0);
 405                error = xfs_btree_insert(cur, &i);
 406                if (error) {
 407                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 408                        return error;
 409                }
 410                ASSERT(i == 1);
 411        }
 412        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 413        /*
 414         * Log allocation group header fields
 415         */
 416        xfs_ialloc_log_agi(tp, agbp,
 417                XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
 418        /*
 419         * Modify/log superblock values for inode count and inode free count.
 420         */
 421        xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
 422        xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
 423        *alloc = 1;
 424        return 0;
 425}
 426
 427STATIC xfs_agnumber_t
 428xfs_ialloc_next_ag(
 429        xfs_mount_t     *mp)
 430{
 431        xfs_agnumber_t  agno;
 432
 433        spin_lock(&mp->m_agirotor_lock);
 434        agno = mp->m_agirotor;
 435        if (++mp->m_agirotor >= mp->m_maxagi)
 436                mp->m_agirotor = 0;
 437        spin_unlock(&mp->m_agirotor_lock);
 438
 439        return agno;
 440}
 441
 442/*
 443 * Select an allocation group to look for a free inode in, based on the parent
 444 * inode and then mode.  Return the allocation group buffer.
 445 */
 446STATIC xfs_agnumber_t
 447xfs_ialloc_ag_select(
 448        xfs_trans_t     *tp,            /* transaction pointer */
 449        xfs_ino_t       parent,         /* parent directory inode number */
 450        umode_t         mode,           /* bits set to indicate file type */
 451        int             okalloc)        /* ok to allocate more space */
 452{
 453        xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
 454        xfs_agnumber_t  agno;           /* current ag number */
 455        int             flags;          /* alloc buffer locking flags */
 456        xfs_extlen_t    ineed;          /* blocks needed for inode allocation */
 457        xfs_extlen_t    longest = 0;    /* longest extent available */
 458        xfs_mount_t     *mp;            /* mount point structure */
 459        int             needspace;      /* file mode implies space allocated */
 460        xfs_perag_t     *pag;           /* per allocation group data */
 461        xfs_agnumber_t  pagno;          /* parent (starting) ag number */
 462        int             error;
 463
 464        /*
 465         * Files of these types need at least one block if length > 0
 466         * (and they won't fit in the inode, but that's hard to figure out).
 467         */
 468        needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
 469        mp = tp->t_mountp;
 470        agcount = mp->m_maxagi;
 471        if (S_ISDIR(mode))
 472                pagno = xfs_ialloc_next_ag(mp);
 473        else {
 474                pagno = XFS_INO_TO_AGNO(mp, parent);
 475                if (pagno >= agcount)
 476                        pagno = 0;
 477        }
 478
 479        ASSERT(pagno < agcount);
 480
 481        /*
 482         * Loop through allocation groups, looking for one with a little
 483         * free space in it.  Note we don't look for free inodes, exactly.
 484         * Instead, we include whether there is a need to allocate inodes
 485         * to mean that blocks must be allocated for them,
 486         * if none are currently free.
 487         */
 488        agno = pagno;
 489        flags = XFS_ALLOC_FLAG_TRYLOCK;
 490        for (;;) {
 491                pag = xfs_perag_get(mp, agno);
 492                if (!pag->pagi_inodeok) {
 493                        xfs_ialloc_next_ag(mp);
 494                        goto nextag;
 495                }
 496
 497                if (!pag->pagi_init) {
 498                        error = xfs_ialloc_pagi_init(mp, tp, agno);
 499                        if (error)
 500                                goto nextag;
 501                }
 502
 503                if (pag->pagi_freecount) {
 504                        xfs_perag_put(pag);
 505                        return agno;
 506                }
 507
 508                if (!okalloc)
 509                        goto nextag;
 510
 511                if (!pag->pagf_init) {
 512                        error = xfs_alloc_pagf_init(mp, tp, agno, flags);
 513                        if (error)
 514                                goto nextag;
 515                }
 516
 517                /*
 518                 * Is there enough free space for the file plus a block of
 519                 * inodes? (if we need to allocate some)?
 520                 */
 521                ineed = XFS_IALLOC_BLOCKS(mp);
 522                longest = pag->pagf_longest;
 523                if (!longest)
 524                        longest = pag->pagf_flcount > 0;
 525
 526                if (pag->pagf_freeblks >= needspace + ineed &&
 527                    longest >= ineed) {
 528                        xfs_perag_put(pag);
 529                        return agno;
 530                }
 531nextag:
 532                xfs_perag_put(pag);
 533                /*
 534                 * No point in iterating over the rest, if we're shutting
 535                 * down.
 536                 */
 537                if (XFS_FORCED_SHUTDOWN(mp))
 538                        return NULLAGNUMBER;
 539                agno++;
 540                if (agno >= agcount)
 541                        agno = 0;
 542                if (agno == pagno) {
 543                        if (flags == 0)
 544                                return NULLAGNUMBER;
 545                        flags = 0;
 546                }
 547        }
 548}
 549
 550/*
 551 * Try to retrieve the next record to the left/right from the current one.
 552 */
 553STATIC int
 554xfs_ialloc_next_rec(
 555        struct xfs_btree_cur    *cur,
 556        xfs_inobt_rec_incore_t  *rec,
 557        int                     *done,
 558        int                     left)
 559{
 560        int                     error;
 561        int                     i;
 562
 563        if (left)
 564                error = xfs_btree_decrement(cur, 0, &i);
 565        else
 566                error = xfs_btree_increment(cur, 0, &i);
 567
 568        if (error)
 569                return error;
 570        *done = !i;
 571        if (i) {
 572                error = xfs_inobt_get_rec(cur, rec, &i);
 573                if (error)
 574                        return error;
 575                XFS_WANT_CORRUPTED_RETURN(i == 1);
 576        }
 577
 578        return 0;
 579}
 580
 581STATIC int
 582xfs_ialloc_get_rec(
 583        struct xfs_btree_cur    *cur,
 584        xfs_agino_t             agino,
 585        xfs_inobt_rec_incore_t  *rec,
 586        int                     *done,
 587        int                     left)
 588{
 589        int                     error;
 590        int                     i;
 591
 592        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
 593        if (error)
 594                return error;
 595        *done = !i;
 596        if (i) {
 597                error = xfs_inobt_get_rec(cur, rec, &i);
 598                if (error)
 599                        return error;
 600                XFS_WANT_CORRUPTED_RETURN(i == 1);
 601        }
 602
 603        return 0;
 604}
 605
 606/*
 607 * Allocate an inode.
 608 *
 609 * The caller selected an AG for us, and made sure that free inodes are
 610 * available.
 611 */
 612STATIC int
 613xfs_dialloc_ag(
 614        struct xfs_trans        *tp,
 615        struct xfs_buf          *agbp,
 616        xfs_ino_t               parent,
 617        xfs_ino_t               *inop)
 618{
 619        struct xfs_mount        *mp = tp->t_mountp;
 620        struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
 621        xfs_agnumber_t          agno = be32_to_cpu(agi->agi_seqno);
 622        xfs_agnumber_t          pagno = XFS_INO_TO_AGNO(mp, parent);
 623        xfs_agino_t             pagino = XFS_INO_TO_AGINO(mp, parent);
 624        struct xfs_perag        *pag;
 625        struct xfs_btree_cur    *cur, *tcur;
 626        struct xfs_inobt_rec_incore rec, trec;
 627        xfs_ino_t               ino;
 628        int                     error;
 629        int                     offset;
 630        int                     i, j;
 631
 632        pag = xfs_perag_get(mp, agno);
 633
 634        ASSERT(pag->pagi_init);
 635        ASSERT(pag->pagi_inodeok);
 636        ASSERT(pag->pagi_freecount > 0);
 637
 638 restart_pagno:
 639        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
 640        /*
 641         * If pagino is 0 (this is the root inode allocation) use newino.
 642         * This must work because we've just allocated some.
 643         */
 644        if (!pagino)
 645                pagino = be32_to_cpu(agi->agi_newino);
 646
 647        error = xfs_check_agi_freecount(cur, agi);
 648        if (error)
 649                goto error0;
 650
 651        /*
 652         * If in the same AG as the parent, try to get near the parent.
 653         */
 654        if (pagno == agno) {
 655                int             doneleft;       /* done, to the left */
 656                int             doneright;      /* done, to the right */
 657                int             searchdistance = 10;
 658
 659                error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
 660                if (error)
 661                        goto error0;
 662                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 663
 664                error = xfs_inobt_get_rec(cur, &rec, &j);
 665                if (error)
 666                        goto error0;
 667                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 668
 669                if (rec.ir_freecount > 0) {
 670                        /*
 671                         * Found a free inode in the same chunk
 672                         * as the parent, done.
 673                         */
 674                        goto alloc_inode;
 675                }
 676
 677
 678                /*
 679                 * In the same AG as parent, but parent's chunk is full.
 680                 */
 681
 682                /* duplicate the cursor, search left & right simultaneously */
 683                error = xfs_btree_dup_cursor(cur, &tcur);
 684                if (error)
 685                        goto error0;
 686
 687                /*
 688                 * Skip to last blocks looked up if same parent inode.
 689                 */
 690                if (pagino != NULLAGINO &&
 691                    pag->pagl_pagino == pagino &&
 692                    pag->pagl_leftrec != NULLAGINO &&
 693                    pag->pagl_rightrec != NULLAGINO) {
 694                        error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
 695                                                   &trec, &doneleft, 1);
 696                        if (error)
 697                                goto error1;
 698
 699                        error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
 700                                                   &rec, &doneright, 0);
 701                        if (error)
 702                                goto error1;
 703                } else {
 704                        /* search left with tcur, back up 1 record */
 705                        error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
 706                        if (error)
 707                                goto error1;
 708
 709                        /* search right with cur, go forward 1 record. */
 710                        error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
 711                        if (error)
 712                                goto error1;
 713                }
 714
 715                /*
 716                 * Loop until we find an inode chunk with a free inode.
 717                 */
 718                while (!doneleft || !doneright) {
 719                        int     useleft;  /* using left inode chunk this time */
 720
 721                        if (!--searchdistance) {
 722                                /*
 723                                 * Not in range - save last search
 724                                 * location and allocate a new inode
 725                                 */
 726                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 727                                pag->pagl_leftrec = trec.ir_startino;
 728                                pag->pagl_rightrec = rec.ir_startino;
 729                                pag->pagl_pagino = pagino;
 730                                goto newino;
 731                        }
 732
 733                        /* figure out the closer block if both are valid. */
 734                        if (!doneleft && !doneright) {
 735                                useleft = pagino -
 736                                 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
 737                                  rec.ir_startino - pagino;
 738                        } else {
 739                                useleft = !doneleft;
 740                        }
 741
 742                        /* free inodes to the left? */
 743                        if (useleft && trec.ir_freecount) {
 744                                rec = trec;
 745                                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 746                                cur = tcur;
 747
 748                                pag->pagl_leftrec = trec.ir_startino;
 749                                pag->pagl_rightrec = rec.ir_startino;
 750                                pag->pagl_pagino = pagino;
 751                                goto alloc_inode;
 752                        }
 753
 754                        /* free inodes to the right? */
 755                        if (!useleft && rec.ir_freecount) {
 756                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 757
 758                                pag->pagl_leftrec = trec.ir_startino;
 759                                pag->pagl_rightrec = rec.ir_startino;
 760                                pag->pagl_pagino = pagino;
 761                                goto alloc_inode;
 762                        }
 763
 764                        /* get next record to check */
 765                        if (useleft) {
 766                                error = xfs_ialloc_next_rec(tcur, &trec,
 767                                                                 &doneleft, 1);
 768                        } else {
 769                                error = xfs_ialloc_next_rec(cur, &rec,
 770                                                                 &doneright, 0);
 771                        }
 772                        if (error)
 773                                goto error1;
 774                }
 775
 776                /*
 777                 * We've reached the end of the btree. because
 778                 * we are only searching a small chunk of the
 779                 * btree each search, there is obviously free
 780                 * inodes closer to the parent inode than we
 781                 * are now. restart the search again.
 782                 */
 783                pag->pagl_pagino = NULLAGINO;
 784                pag->pagl_leftrec = NULLAGINO;
 785                pag->pagl_rightrec = NULLAGINO;
 786                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 787                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 788                goto restart_pagno;
 789        }
 790
 791        /*
 792         * In a different AG from the parent.
 793         * See if the most recently allocated block has any free.
 794         */
 795newino:
 796        if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
 797                error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
 798                                         XFS_LOOKUP_EQ, &i);
 799                if (error)
 800                        goto error0;
 801
 802                if (i == 1) {
 803                        error = xfs_inobt_get_rec(cur, &rec, &j);
 804                        if (error)
 805                                goto error0;
 806
 807                        if (j == 1 && rec.ir_freecount > 0) {
 808                                /*
 809                                 * The last chunk allocated in the group
 810                                 * still has a free inode.
 811                                 */
 812                                goto alloc_inode;
 813                        }
 814                }
 815        }
 816
 817        /*
 818         * None left in the last group, search the whole AG
 819         */
 820        error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
 821        if (error)
 822                goto error0;
 823        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 824
 825        for (;;) {
 826                error = xfs_inobt_get_rec(cur, &rec, &i);
 827                if (error)
 828                        goto error0;
 829                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 830                if (rec.ir_freecount > 0)
 831                        break;
 832                error = xfs_btree_increment(cur, 0, &i);
 833                if (error)
 834                        goto error0;
 835                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 836        }
 837
 838alloc_inode:
 839        offset = xfs_lowbit64(rec.ir_free);
 840        ASSERT(offset >= 0);
 841        ASSERT(offset < XFS_INODES_PER_CHUNK);
 842        ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
 843                                   XFS_INODES_PER_CHUNK) == 0);
 844        ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
 845        rec.ir_free &= ~XFS_INOBT_MASK(offset);
 846        rec.ir_freecount--;
 847        error = xfs_inobt_update(cur, &rec);
 848        if (error)
 849                goto error0;
 850        be32_add_cpu(&agi->agi_freecount, -1);
 851        xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
 852        pag->pagi_freecount--;
 853
 854        error = xfs_check_agi_freecount(cur, agi);
 855        if (error)
 856                goto error0;
 857
 858        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 859        xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
 860        xfs_perag_put(pag);
 861        *inop = ino;
 862        return 0;
 863error1:
 864        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
 865error0:
 866        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 867        xfs_perag_put(pag);
 868        return error;
 869}
 870
 871/*
 872 * Allocate an inode on disk.
 873 *
 874 * Mode is used to tell whether the new inode will need space, and whether it
 875 * is a directory.
 876 *
 877 * This function is designed to be called twice if it has to do an allocation
 878 * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
 879 * If an inode is available without having to performn an allocation, an inode
 880 * number is returned.  In this case, *IO_agbp would be NULL.  If an allocation
 881 * needes to be done, xfs_dialloc would return the current AGI buffer in
 882 * *IO_agbp.  The caller should then commit the current transaction, allocate a
 883 * new transaction, and call xfs_dialloc() again, passing in the previous value
 884 * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
 885 * buffer is locked across the two calls, the second call is guaranteed to have
 886 * a free inode available.
 887 *
 888 * Once we successfully pick an inode its number is returned and the on-disk
 889 * data structures are updated.  The inode itself is not read in, since doing so
 890 * would break ordering constraints with xfs_reclaim.
 891 */
 892int
 893xfs_dialloc(
 894        struct xfs_trans        *tp,
 895        xfs_ino_t               parent,
 896        umode_t                 mode,
 897        int                     okalloc,
 898        struct xfs_buf          **IO_agbp,
 899        xfs_ino_t               *inop)
 900{
 901        struct xfs_mount        *mp = tp->t_mountp;
 902        struct xfs_buf          *agbp;
 903        xfs_agnumber_t          agno;
 904        int                     error;
 905        int                     ialloced;
 906        int                     noroom = 0;
 907        xfs_agnumber_t          start_agno;
 908        struct xfs_perag        *pag;
 909
 910        if (*IO_agbp) {
 911                /*
 912                 * If the caller passes in a pointer to the AGI buffer,
 913                 * continue where we left off before.  In this case, we
 914                 * know that the allocation group has free inodes.
 915                 */
 916                agbp = *IO_agbp;
 917                goto out_alloc;
 918        }
 919
 920        /*
 921         * We do not have an agbp, so select an initial allocation
 922         * group for inode allocation.
 923         */
 924        start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
 925        if (start_agno == NULLAGNUMBER) {
 926                *inop = NULLFSINO;
 927                return 0;
 928        }
 929
 930        /*
 931         * If we have already hit the ceiling of inode blocks then clear
 932         * okalloc so we scan all available agi structures for a free
 933         * inode.
 934         */
 935        if (mp->m_maxicount &&
 936            mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
 937                noroom = 1;
 938                okalloc = 0;
 939        }
 940
 941        /*
 942         * Loop until we find an allocation group that either has free inodes
 943         * or in which we can allocate some inodes.  Iterate through the
 944         * allocation groups upward, wrapping at the end.
 945         */
 946        agno = start_agno;
 947        for (;;) {
 948                pag = xfs_perag_get(mp, agno);
 949                if (!pag->pagi_inodeok) {
 950                        xfs_ialloc_next_ag(mp);
 951                        goto nextag;
 952                }
 953
 954                if (!pag->pagi_init) {
 955                        error = xfs_ialloc_pagi_init(mp, tp, agno);
 956                        if (error)
 957                                goto out_error;
 958                }
 959
 960                /*
 961                 * Do a first racy fast path check if this AG is usable.
 962                 */
 963                if (!pag->pagi_freecount && !okalloc)
 964                        goto nextag;
 965
 966                /*
 967                 * Then read in the AGI buffer and recheck with the AGI buffer
 968                 * lock held.
 969                 */
 970                error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
 971                if (error)
 972                        goto out_error;
 973
 974                if (pag->pagi_freecount) {
 975                        xfs_perag_put(pag);
 976                        goto out_alloc;
 977                }
 978
 979                if (!okalloc)
 980                        goto nextag_relse_buffer;
 981
 982
 983                error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
 984                if (error) {
 985                        xfs_trans_brelse(tp, agbp);
 986
 987                        if (error != ENOSPC)
 988                                goto out_error;
 989
 990                        xfs_perag_put(pag);
 991                        *inop = NULLFSINO;
 992                        return 0;
 993                }
 994
 995                if (ialloced) {
 996                        /*
 997                         * We successfully allocated some inodes, return
 998                         * the current context to the caller so that it
 999                         * can commit the current transaction and call
1000                         * us again where we left off.
1001                         */
1002                        ASSERT(pag->pagi_freecount > 0);
1003                        xfs_perag_put(pag);
1004
1005                        *IO_agbp = agbp;
1006                        *inop = NULLFSINO;
1007                        return 0;
1008                }
1009
1010nextag_relse_buffer:
1011                xfs_trans_brelse(tp, agbp);
1012nextag:
1013                xfs_perag_put(pag);
1014                if (++agno == mp->m_sb.sb_agcount)
1015                        agno = 0;
1016                if (agno == start_agno) {
1017                        *inop = NULLFSINO;
1018                        return noroom ? ENOSPC : 0;
1019                }
1020        }
1021
1022out_alloc:
1023        *IO_agbp = NULL;
1024        return xfs_dialloc_ag(tp, agbp, parent, inop);
1025out_error:
1026        xfs_perag_put(pag);
1027        return XFS_ERROR(error);
1028}
1029
1030/*
1031 * Free disk inode.  Carefully avoids touching the incore inode, all
1032 * manipulations incore are the caller's responsibility.
1033 * The on-disk inode is not changed by this operation, only the
1034 * btree (free inode mask) is changed.
1035 */
1036int
1037xfs_difree(
1038        xfs_trans_t     *tp,            /* transaction pointer */
1039        xfs_ino_t       inode,          /* inode to be freed */
1040        xfs_bmap_free_t *flist,         /* extents to free */
1041        int             *delete,        /* set if inode cluster was deleted */
1042        xfs_ino_t       *first_ino)     /* first inode in deleted cluster */
1043{
1044        /* REFERENCED */
1045        xfs_agblock_t   agbno;  /* block number containing inode */
1046        xfs_buf_t       *agbp;  /* buffer containing allocation group header */
1047        xfs_agino_t     agino;  /* inode number relative to allocation group */
1048        xfs_agnumber_t  agno;   /* allocation group number */
1049        xfs_agi_t       *agi;   /* allocation group header */
1050        xfs_btree_cur_t *cur;   /* inode btree cursor */
1051        int             error;  /* error return value */
1052        int             i;      /* result code */
1053        int             ilen;   /* inodes in an inode cluster */
1054        xfs_mount_t     *mp;    /* mount structure for filesystem */
1055        int             off;    /* offset of inode in inode chunk */
1056        xfs_inobt_rec_incore_t rec;     /* btree record */
1057        struct xfs_perag *pag;
1058
1059        mp = tp->t_mountp;
1060
1061        /*
1062         * Break up inode number into its components.
1063         */
1064        agno = XFS_INO_TO_AGNO(mp, inode);
1065        if (agno >= mp->m_sb.sb_agcount)  {
1066                xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1067                        __func__, agno, mp->m_sb.sb_agcount);
1068                ASSERT(0);
1069                return XFS_ERROR(EINVAL);
1070        }
1071        agino = XFS_INO_TO_AGINO(mp, inode);
1072        if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  {
1073                xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1074                        __func__, (unsigned long long)inode,
1075                        (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1076                ASSERT(0);
1077                return XFS_ERROR(EINVAL);
1078        }
1079        agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1080        if (agbno >= mp->m_sb.sb_agblocks)  {
1081                xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1082                        __func__, agbno, mp->m_sb.sb_agblocks);
1083                ASSERT(0);
1084                return XFS_ERROR(EINVAL);
1085        }
1086        /*
1087         * Get the allocation group header.
1088         */
1089        error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1090        if (error) {
1091                xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1092                        __func__, error);
1093                return error;
1094        }
1095        agi = XFS_BUF_TO_AGI(agbp);
1096        ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1097        ASSERT(agbno < be32_to_cpu(agi->agi_length));
1098        /*
1099         * Initialize the cursor.
1100         */
1101        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1102
1103        error = xfs_check_agi_freecount(cur, agi);
1104        if (error)
1105                goto error0;
1106
1107        /*
1108         * Look for the entry describing this inode.
1109         */
1110        if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
1111                xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
1112                        __func__, error);
1113                goto error0;
1114        }
1115        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1116        error = xfs_inobt_get_rec(cur, &rec, &i);
1117        if (error) {
1118                xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
1119                        __func__, error);
1120                goto error0;
1121        }
1122        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1123        /*
1124         * Get the offset in the inode chunk.
1125         */
1126        off = agino - rec.ir_startino;
1127        ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
1128        ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
1129        /*
1130         * Mark the inode free & increment the count.
1131         */
1132        rec.ir_free |= XFS_INOBT_MASK(off);
1133        rec.ir_freecount++;
1134
1135        /*
1136         * When an inode cluster is free, it becomes eligible for removal
1137         */
1138        if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
1139            (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
1140
1141                *delete = 1;
1142                *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
1143
1144                /*
1145                 * Remove the inode cluster from the AGI B+Tree, adjust the
1146                 * AGI and Superblock inode counts, and mark the disk space
1147                 * to be freed when the transaction is committed.
1148                 */
1149                ilen = XFS_IALLOC_INODES(mp);
1150                be32_add_cpu(&agi->agi_count, -ilen);
1151                be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1152                xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1153                pag = xfs_perag_get(mp, agno);
1154                pag->pagi_freecount -= ilen - 1;
1155                xfs_perag_put(pag);
1156                xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1157                xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1158
1159                if ((error = xfs_btree_delete(cur, &i))) {
1160                        xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
1161                                __func__, error);
1162                        goto error0;
1163                }
1164
1165                xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
1166                                agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
1167                                XFS_IALLOC_BLOCKS(mp), flist, mp);
1168        } else {
1169                *delete = 0;
1170
1171                error = xfs_inobt_update(cur, &rec);
1172                if (error) {
1173                        xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
1174                                __func__, error);
1175                        goto error0;
1176                }
1177
1178                /* 
1179                 * Change the inode free counts and log the ag/sb changes.
1180                 */
1181                be32_add_cpu(&agi->agi_freecount, 1);
1182                xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1183                pag = xfs_perag_get(mp, agno);
1184                pag->pagi_freecount++;
1185                xfs_perag_put(pag);
1186                xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1187        }
1188
1189        error = xfs_check_agi_freecount(cur, agi);
1190        if (error)
1191                goto error0;
1192
1193        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1194        return 0;
1195
1196error0:
1197        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1198        return error;
1199}
1200
1201STATIC int
1202xfs_imap_lookup(
1203        struct xfs_mount        *mp,
1204        struct xfs_trans        *tp,
1205        xfs_agnumber_t          agno,
1206        xfs_agino_t             agino,
1207        xfs_agblock_t           agbno,
1208        xfs_agblock_t           *chunk_agbno,
1209        xfs_agblock_t           *offset_agbno,
1210        int                     flags)
1211{
1212        struct xfs_inobt_rec_incore rec;
1213        struct xfs_btree_cur    *cur;
1214        struct xfs_buf          *agbp;
1215        int                     error;
1216        int                     i;
1217
1218        error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1219        if (error) {
1220                xfs_alert(mp,
1221                        "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
1222                        __func__, error, agno);
1223                return error;
1224        }
1225
1226        /*
1227         * Lookup the inode record for the given agino. If the record cannot be
1228         * found, then it's an invalid inode number and we should abort. Once
1229         * we have a record, we need to ensure it contains the inode number
1230         * we are looking up.
1231         */
1232        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1233        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1234        if (!error) {
1235                if (i)
1236                        error = xfs_inobt_get_rec(cur, &rec, &i);
1237                if (!error && i == 0)
1238                        error = EINVAL;
1239        }
1240
1241        xfs_trans_brelse(tp, agbp);
1242        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1243        if (error)
1244                return error;
1245
1246        /* check that the returned record contains the required inode */
1247        if (rec.ir_startino > agino ||
1248            rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
1249                return EINVAL;
1250
1251        /* for untrusted inodes check it is allocated first */
1252        if ((flags & XFS_IGET_UNTRUSTED) &&
1253            (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
1254                return EINVAL;
1255
1256        *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
1257        *offset_agbno = agbno - *chunk_agbno;
1258        return 0;
1259}
1260
1261/*
1262 * Return the location of the inode in imap, for mapping it into a buffer.
1263 */
1264int
1265xfs_imap(
1266        xfs_mount_t      *mp,   /* file system mount structure */
1267        xfs_trans_t      *tp,   /* transaction pointer */
1268        xfs_ino_t       ino,    /* inode to locate */
1269        struct xfs_imap *imap,  /* location map structure */
1270        uint            flags)  /* flags for inode btree lookup */
1271{
1272        xfs_agblock_t   agbno;  /* block number of inode in the alloc group */
1273        xfs_agino_t     agino;  /* inode number within alloc group */
1274        xfs_agnumber_t  agno;   /* allocation group number */
1275        int             blks_per_cluster; /* num blocks per inode cluster */
1276        xfs_agblock_t   chunk_agbno;    /* first block in inode chunk */
1277        xfs_agblock_t   cluster_agbno;  /* first block in inode cluster */
1278        int             error;  /* error code */
1279        int             offset; /* index of inode in its buffer */
1280        int             offset_agbno;   /* blks from chunk start to inode */
1281
1282        ASSERT(ino != NULLFSINO);
1283
1284        /*
1285         * Split up the inode number into its parts.
1286         */
1287        agno = XFS_INO_TO_AGNO(mp, ino);
1288        agino = XFS_INO_TO_AGINO(mp, ino);
1289        agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1290        if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1291            ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1292#ifdef DEBUG
1293                /*
1294                 * Don't output diagnostic information for untrusted inodes
1295                 * as they can be invalid without implying corruption.
1296                 */
1297                if (flags & XFS_IGET_UNTRUSTED)
1298                        return XFS_ERROR(EINVAL);
1299                if (agno >= mp->m_sb.sb_agcount) {
1300                        xfs_alert(mp,
1301                                "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
1302                                __func__, agno, mp->m_sb.sb_agcount);
1303                }
1304                if (agbno >= mp->m_sb.sb_agblocks) {
1305                        xfs_alert(mp,
1306                "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
1307                                __func__, (unsigned long long)agbno,
1308                                (unsigned long)mp->m_sb.sb_agblocks);
1309                }
1310                if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1311                        xfs_alert(mp,
1312                "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
1313                                __func__, ino,
1314                                XFS_AGINO_TO_INO(mp, agno, agino));
1315                }
1316                xfs_stack_trace();
1317#endif /* DEBUG */
1318                return XFS_ERROR(EINVAL);
1319        }
1320
1321        blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1322
1323        /*
1324         * For bulkstat and handle lookups, we have an untrusted inode number
1325         * that we have to verify is valid. We cannot do this just by reading
1326         * the inode buffer as it may have been unlinked and removed leaving
1327         * inodes in stale state on disk. Hence we have to do a btree lookup
1328         * in all cases where an untrusted inode number is passed.
1329         */
1330        if (flags & XFS_IGET_UNTRUSTED) {
1331                error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1332                                        &chunk_agbno, &offset_agbno, flags);
1333                if (error)
1334                        return error;
1335                goto out_map;
1336        }
1337
1338        /*
1339         * If the inode cluster size is the same as the blocksize or
1340         * smaller we get to the buffer by simple arithmetics.
1341         */
1342        if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
1343                offset = XFS_INO_TO_OFFSET(mp, ino);
1344                ASSERT(offset < mp->m_sb.sb_inopblock);
1345
1346                imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
1347                imap->im_len = XFS_FSB_TO_BB(mp, 1);
1348                imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1349                return 0;
1350        }
1351
1352        /*
1353         * If the inode chunks are aligned then use simple maths to
1354         * find the location. Otherwise we have to do a btree
1355         * lookup to find the location.
1356         */
1357        if (mp->m_inoalign_mask) {
1358                offset_agbno = agbno & mp->m_inoalign_mask;
1359                chunk_agbno = agbno - offset_agbno;
1360        } else {
1361                error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1362                                        &chunk_agbno, &offset_agbno, flags);
1363                if (error)
1364                        return error;
1365        }
1366
1367out_map:
1368        ASSERT(agbno >= chunk_agbno);
1369        cluster_agbno = chunk_agbno +
1370                ((offset_agbno / blks_per_cluster) * blks_per_cluster);
1371        offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1372                XFS_INO_TO_OFFSET(mp, ino);
1373
1374        imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
1375        imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1376        imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1377
1378        /*
1379         * If the inode number maps to a block outside the bounds
1380         * of the file system then return NULL rather than calling
1381         * read_buf and panicing when we get an error from the
1382         * driver.
1383         */
1384        if ((imap->im_blkno + imap->im_len) >
1385            XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1386                xfs_alert(mp,
1387        "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
1388                        __func__, (unsigned long long) imap->im_blkno,
1389                        (unsigned long long) imap->im_len,
1390                        XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1391                return XFS_ERROR(EINVAL);
1392        }
1393        return 0;
1394}
1395
1396/*
1397 * Compute and fill in value of m_in_maxlevels.
1398 */
1399void
1400xfs_ialloc_compute_maxlevels(
1401        xfs_mount_t     *mp)            /* file system mount structure */
1402{
1403        int             level;
1404        uint            maxblocks;
1405        uint            maxleafents;
1406        int             minleafrecs;
1407        int             minnoderecs;
1408
1409        maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
1410                XFS_INODES_PER_CHUNK_LOG;
1411        minleafrecs = mp->m_alloc_mnr[0];
1412        minnoderecs = mp->m_alloc_mnr[1];
1413        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1414        for (level = 1; maxblocks > 1; level++)
1415                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1416        mp->m_in_maxlevels = level;
1417}
1418
1419/*
1420 * Log specified fields for the ag hdr (inode section)
1421 */
1422void
1423xfs_ialloc_log_agi(
1424        xfs_trans_t     *tp,            /* transaction pointer */
1425        xfs_buf_t       *bp,            /* allocation group header buffer */
1426        int             fields)         /* bitmask of fields to log */
1427{
1428        int                     first;          /* first byte number */
1429        int                     last;           /* last byte number */
1430        static const short      offsets[] = {   /* field starting offsets */
1431                                        /* keep in sync with bit definitions */
1432                offsetof(xfs_agi_t, agi_magicnum),
1433                offsetof(xfs_agi_t, agi_versionnum),
1434                offsetof(xfs_agi_t, agi_seqno),
1435                offsetof(xfs_agi_t, agi_length),
1436                offsetof(xfs_agi_t, agi_count),
1437                offsetof(xfs_agi_t, agi_root),
1438                offsetof(xfs_agi_t, agi_level),
1439                offsetof(xfs_agi_t, agi_freecount),
1440                offsetof(xfs_agi_t, agi_newino),
1441                offsetof(xfs_agi_t, agi_dirino),
1442                offsetof(xfs_agi_t, agi_unlinked),
1443                sizeof(xfs_agi_t)
1444        };
1445#ifdef DEBUG
1446        xfs_agi_t               *agi;   /* allocation group header */
1447
1448        agi = XFS_BUF_TO_AGI(bp);
1449        ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1450#endif
1451        /*
1452         * Compute byte offsets for the first and last fields.
1453         */
1454        xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
1455        /*
1456         * Log the allocation group inode header buffer.
1457         */
1458        xfs_trans_log_buf(tp, bp, first, last);
1459}
1460
1461#ifdef DEBUG
1462STATIC void
1463xfs_check_agi_unlinked(
1464        struct xfs_agi          *agi)
1465{
1466        int                     i;
1467
1468        for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
1469                ASSERT(agi->agi_unlinked[i]);
1470}
1471#else
1472#define xfs_check_agi_unlinked(agi)
1473#endif
1474
1475/*
1476 * Read in the allocation group header (inode allocation section)
1477 */
1478int
1479xfs_read_agi(
1480        struct xfs_mount        *mp,    /* file system mount structure */
1481        struct xfs_trans        *tp,    /* transaction pointer */
1482        xfs_agnumber_t          agno,   /* allocation group number */
1483        struct xfs_buf          **bpp)  /* allocation group hdr buf */
1484{
1485        struct xfs_agi          *agi;   /* allocation group header */
1486        int                     agi_ok; /* agi is consistent */
1487        int                     error;
1488
1489        ASSERT(agno != NULLAGNUMBER);
1490
1491        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
1492                        XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1493                        XFS_FSS_TO_BB(mp, 1), 0, bpp);
1494        if (error)
1495                return error;
1496
1497        ASSERT(!xfs_buf_geterror(*bpp));
1498        agi = XFS_BUF_TO_AGI(*bpp);
1499
1500        /*
1501         * Validate the magic number of the agi block.
1502         */
1503        agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
1504                XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
1505                be32_to_cpu(agi->agi_seqno) == agno;
1506        if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1507                        XFS_RANDOM_IALLOC_READ_AGI))) {
1508                XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
1509                                     mp, agi);
1510                xfs_trans_brelse(tp, *bpp);
1511                return XFS_ERROR(EFSCORRUPTED);
1512        }
1513
1514        xfs_buf_set_ref(*bpp, XFS_AGI_REF);
1515
1516        xfs_check_agi_unlinked(agi);
1517        return 0;
1518}
1519
1520int
1521xfs_ialloc_read_agi(
1522        struct xfs_mount        *mp,    /* file system mount structure */
1523        struct xfs_trans        *tp,    /* transaction pointer */
1524        xfs_agnumber_t          agno,   /* allocation group number */
1525        struct xfs_buf          **bpp)  /* allocation group hdr buf */
1526{
1527        struct xfs_agi          *agi;   /* allocation group header */
1528        struct xfs_perag        *pag;   /* per allocation group data */
1529        int                     error;
1530
1531        error = xfs_read_agi(mp, tp, agno, bpp);
1532        if (error)
1533                return error;
1534
1535        agi = XFS_BUF_TO_AGI(*bpp);
1536        pag = xfs_perag_get(mp, agno);
1537        if (!pag->pagi_init) {
1538                pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1539                pag->pagi_count = be32_to_cpu(agi->agi_count);
1540                pag->pagi_init = 1;
1541        }
1542
1543        /*
1544         * It's possible for these to be out of sync if
1545         * we are in the middle of a forced shutdown.
1546         */
1547        ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1548                XFS_FORCED_SHUTDOWN(mp));
1549        xfs_perag_put(pag);
1550        return 0;
1551}
1552
1553/*
1554 * Read in the agi to initialise the per-ag data in the mount structure
1555 */
1556int
1557xfs_ialloc_pagi_init(
1558        xfs_mount_t     *mp,            /* file system mount structure */
1559        xfs_trans_t     *tp,            /* transaction pointer */
1560        xfs_agnumber_t  agno)           /* allocation group number */
1561{
1562        xfs_buf_t       *bp = NULL;
1563        int             error;
1564
1565        error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
1566        if (error)
1567                return error;
1568        if (bp)
1569                xfs_trans_brelse(tp, bp);
1570        return 0;
1571}
1572
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.