linux/fs/xfs/xfs_ialloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_inum.h"
  24#include "xfs_trans.h"
  25#include "xfs_sb.h"
  26#include "xfs_ag.h"
  27#include "xfs_mount.h"
  28#include "xfs_bmap_btree.h"
  29#include "xfs_alloc_btree.h"
  30#include "xfs_ialloc_btree.h"
  31#include "xfs_dinode.h"
  32#include "xfs_inode.h"
  33#include "xfs_btree.h"
  34#include "xfs_ialloc.h"
  35#include "xfs_alloc.h"
  36#include "xfs_rtalloc.h"
  37#include "xfs_error.h"
  38#include "xfs_bmap.h"
  39
  40
  41/*
  42 * Allocation group level functions.
  43 */
  44static inline int
  45xfs_ialloc_cluster_alignment(
  46        xfs_alloc_arg_t *args)
  47{
  48        if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
  49            args->mp->m_sb.sb_inoalignmt >=
  50             XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
  51                return args->mp->m_sb.sb_inoalignmt;
  52        return 1;
  53}
  54
  55/*
  56 * Lookup a record by ino in the btree given by cur.
  57 */
  58int                                     /* error */
  59xfs_inobt_lookup(
  60        struct xfs_btree_cur    *cur,   /* btree cursor */
  61        xfs_agino_t             ino,    /* starting inode of chunk */
  62        xfs_lookup_t            dir,    /* <=, >=, == */
  63        int                     *stat)  /* success/failure */
  64{
  65        cur->bc_rec.i.ir_startino = ino;
  66        cur->bc_rec.i.ir_freecount = 0;
  67        cur->bc_rec.i.ir_free = 0;
  68        return xfs_btree_lookup(cur, dir, stat);
  69}
  70
  71/*
  72 * Update the record referred to by cur to the value given.
  73 * This either works (return 0) or gets an EFSCORRUPTED error.
  74 */
  75STATIC int                              /* error */
  76xfs_inobt_update(
  77        struct xfs_btree_cur    *cur,   /* btree cursor */
  78        xfs_inobt_rec_incore_t  *irec)  /* btree record */
  79{
  80        union xfs_btree_rec     rec;
  81
  82        rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
  83        rec.inobt.ir_freecount = cpu_to_be32(irec->ir_freecount);
  84        rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
  85        return xfs_btree_update(cur, &rec);
  86}
  87
  88/*
  89 * Get the data from the pointed-to record.
  90 */
  91int                                     /* error */
  92xfs_inobt_get_rec(
  93        struct xfs_btree_cur    *cur,   /* btree cursor */
  94        xfs_inobt_rec_incore_t  *irec,  /* btree record */
  95        int                     *stat)  /* output: success/failure */
  96{
  97        union xfs_btree_rec     *rec;
  98        int                     error;
  99
 100        error = xfs_btree_get_rec(cur, &rec, stat);
 101        if (!error && *stat == 1) {
 102                irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
 103                irec->ir_freecount = be32_to_cpu(rec->inobt.ir_freecount);
 104                irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
 105        }
 106        return error;
 107}
 108
 109/*
 110 * Verify that the number of free inodes in the AGI is correct.
 111 */
 112#ifdef DEBUG
 113STATIC int
 114xfs_check_agi_freecount(
 115        struct xfs_btree_cur    *cur,
 116        struct xfs_agi          *agi)
 117{
 118        if (cur->bc_nlevels == 1) {
 119                xfs_inobt_rec_incore_t rec;
 120                int             freecount = 0;
 121                int             error;
 122                int             i;
 123
 124                error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
 125                if (error)
 126                        return error;
 127
 128                do {
 129                        error = xfs_inobt_get_rec(cur, &rec, &i);
 130                        if (error)
 131                                return error;
 132
 133                        if (i) {
 134                                freecount += rec.ir_freecount;
 135                                error = xfs_btree_increment(cur, 0, &i);
 136                                if (error)
 137                                        return error;
 138                        }
 139                } while (i == 1);
 140
 141                if (!XFS_FORCED_SHUTDOWN(cur->bc_mp))
 142                        ASSERT(freecount == be32_to_cpu(agi->agi_freecount));
 143        }
 144        return 0;
 145}
 146#else
 147#define xfs_check_agi_freecount(cur, agi)       0
 148#endif
 149
 150/*
 151 * Initialise a new set of inodes.
 152 */
 153STATIC int
 154xfs_ialloc_inode_init(
 155        struct xfs_mount        *mp,
 156        struct xfs_trans        *tp,
 157        xfs_agnumber_t          agno,
 158        xfs_agblock_t           agbno,
 159        xfs_agblock_t           length,
 160        unsigned int            gen)
 161{
 162        struct xfs_buf          *fbuf;
 163        struct xfs_dinode       *free;
 164        int                     blks_per_cluster, nbufs, ninodes;
 165        int                     version;
 166        int                     i, j;
 167        xfs_daddr_t             d;
 168
 169        /*
 170         * Loop over the new block(s), filling in the inodes.
 171         * For small block sizes, manipulate the inodes in buffers
 172         * which are multiples of the blocks size.
 173         */
 174        if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
 175                blks_per_cluster = 1;
 176                nbufs = length;
 177                ninodes = mp->m_sb.sb_inopblock;
 178        } else {
 179                blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
 180                                   mp->m_sb.sb_blocksize;
 181                nbufs = length / blks_per_cluster;
 182                ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
 183        }
 184
 185        /*
 186         * Figure out what version number to use in the inodes we create.
 187         * If the superblock version has caught up to the one that supports
 188         * the new inode format, then use the new inode version.  Otherwise
 189         * use the old version so that old kernels will continue to be
 190         * able to use the file system.
 191         */
 192        if (xfs_sb_version_hasnlink(&mp->m_sb))
 193                version = 2;
 194        else
 195                version = 1;
 196
 197        for (j = 0; j < nbufs; j++) {
 198                /*
 199                 * Get the block.
 200                 */
 201                d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster));
 202                fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
 203                                         mp->m_bsize * blks_per_cluster, 0);
 204                if (!fbuf)
 205                        return ENOMEM;
 206                /*
 207                 * Initialize all inodes in this buffer and then log them.
 208                 *
 209                 * XXX: It would be much better if we had just one transaction
 210                 *      to log a whole cluster of inodes instead of all the
 211                 *      individual transactions causing a lot of log traffic.
 212                 */
 213                xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
 214                for (i = 0; i < ninodes; i++) {
 215                        int     ioffset = i << mp->m_sb.sb_inodelog;
 216                        uint    isize = sizeof(struct xfs_dinode);
 217
 218                        free = xfs_make_iptr(mp, fbuf, i);
 219                        free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
 220                        free->di_version = version;
 221                        free->di_gen = cpu_to_be32(gen);
 222                        free->di_next_unlinked = cpu_to_be32(NULLAGINO);
 223                        xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1);
 224                }
 225                xfs_trans_inode_alloc_buf(tp, fbuf);
 226        }
 227        return 0;
 228}
 229
 230/*
 231 * Allocate new inodes in the allocation group specified by agbp.
 232 * Return 0 for success, else error code.
 233 */
 234STATIC int                              /* error code or 0 */
 235xfs_ialloc_ag_alloc(
 236        xfs_trans_t     *tp,            /* transaction pointer */
 237        xfs_buf_t       *agbp,          /* alloc group buffer */
 238        int             *alloc)
 239{
 240        xfs_agi_t       *agi;           /* allocation group header */
 241        xfs_alloc_arg_t args;           /* allocation argument structure */
 242        xfs_btree_cur_t *cur;           /* inode btree cursor */
 243        xfs_agnumber_t  agno;
 244        int             error;
 245        int             i;
 246        xfs_agino_t     newino;         /* new first inode's number */
 247        xfs_agino_t     newlen;         /* new number of inodes */
 248        xfs_agino_t     thisino;        /* current inode number, for loop */
 249        int             isaligned = 0;  /* inode allocation at stripe unit */
 250                                        /* boundary */
 251        struct xfs_perag *pag;
 252
 253        args.tp = tp;
 254        args.mp = tp->t_mountp;
 255
 256        /*
 257         * Locking will ensure that we don't have two callers in here
 258         * at one time.
 259         */
 260        newlen = XFS_IALLOC_INODES(args.mp);
 261        if (args.mp->m_maxicount &&
 262            args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
 263                return XFS_ERROR(ENOSPC);
 264        args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
 265        /*
 266         * First try to allocate inodes contiguous with the last-allocated
 267         * chunk of inodes.  If the filesystem is striped, this will fill
 268         * an entire stripe unit with inodes.
 269         */
 270        agi = XFS_BUF_TO_AGI(agbp);
 271        newino = be32_to_cpu(agi->agi_newino);
 272        agno = be32_to_cpu(agi->agi_seqno);
 273        args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
 274                        XFS_IALLOC_BLOCKS(args.mp);
 275        if (likely(newino != NULLAGINO &&
 276                  (args.agbno < be32_to_cpu(agi->agi_length)))) {
 277                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
 278                args.type = XFS_ALLOCTYPE_THIS_BNO;
 279                args.mod = args.total = args.wasdel = args.isfl =
 280                        args.userdata = args.minalignslop = 0;
 281                args.prod = 1;
 282
 283                /*
 284                 * We need to take into account alignment here to ensure that
 285                 * we don't modify the free list if we fail to have an exact
 286                 * block. If we don't have an exact match, and every oher
 287                 * attempt allocation attempt fails, we'll end up cancelling
 288                 * a dirty transaction and shutting down.
 289                 *
 290                 * For an exact allocation, alignment must be 1,
 291                 * however we need to take cluster alignment into account when
 292                 * fixing up the freelist. Use the minalignslop field to
 293                 * indicate that extra blocks might be required for alignment,
 294                 * but not to use them in the actual exact allocation.
 295                 */
 296                args.alignment = 1;
 297                args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
 298
 299                /* Allow space for the inode btree to split. */
 300                args.minleft = args.mp->m_in_maxlevels - 1;
 301                if ((error = xfs_alloc_vextent(&args)))
 302                        return error;
 303        } else
 304                args.fsbno = NULLFSBLOCK;
 305
 306        if (unlikely(args.fsbno == NULLFSBLOCK)) {
 307                /*
 308                 * Set the alignment for the allocation.
 309                 * If stripe alignment is turned on then align at stripe unit
 310                 * boundary.
 311                 * If the cluster size is smaller than a filesystem block
 312                 * then we're doing I/O for inodes in filesystem block size
 313                 * pieces, so don't need alignment anyway.
 314                 */
 315                isaligned = 0;
 316                if (args.mp->m_sinoalign) {
 317                        ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
 318                        args.alignment = args.mp->m_dalign;
 319                        isaligned = 1;
 320                } else
 321                        args.alignment = xfs_ialloc_cluster_alignment(&args);
 322                /*
 323                 * Need to figure out where to allocate the inode blocks.
 324                 * Ideally they should be spaced out through the a.g.
 325                 * For now, just allocate blocks up front.
 326                 */
 327                args.agbno = be32_to_cpu(agi->agi_root);
 328                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
 329                /*
 330                 * Allocate a fixed-size extent of inodes.
 331                 */
 332                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 333                args.mod = args.total = args.wasdel = args.isfl =
 334                        args.userdata = args.minalignslop = 0;
 335                args.prod = 1;
 336                /*
 337                 * Allow space for the inode btree to split.
 338                 */
 339                args.minleft = args.mp->m_in_maxlevels - 1;
 340                if ((error = xfs_alloc_vextent(&args)))
 341                        return error;
 342        }
 343
 344        /*
 345         * If stripe alignment is turned on, then try again with cluster
 346         * alignment.
 347         */
 348        if (isaligned && args.fsbno == NULLFSBLOCK) {
 349                args.type = XFS_ALLOCTYPE_NEAR_BNO;
 350                args.agbno = be32_to_cpu(agi->agi_root);
 351                args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
 352                args.alignment = xfs_ialloc_cluster_alignment(&args);
 353                if ((error = xfs_alloc_vextent(&args)))
 354                        return error;
 355        }
 356
 357        if (args.fsbno == NULLFSBLOCK) {
 358                *alloc = 0;
 359                return 0;
 360        }
 361        ASSERT(args.len == args.minlen);
 362
 363        /*
 364         * Stamp and write the inode buffers.
 365         *
 366         * Seed the new inode cluster with a random generation number. This
 367         * prevents short-term reuse of generation numbers if a chunk is
 368         * freed and then immediately reallocated. We use random numbers
 369         * rather than a linear progression to prevent the next generation
 370         * number from being easily guessable.
 371         */
 372        error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
 373                        args.len, random32());
 374
 375        if (error)
 376                return error;
 377        /*
 378         * Convert the results.
 379         */
 380        newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
 381        be32_add_cpu(&agi->agi_count, newlen);
 382        be32_add_cpu(&agi->agi_freecount, newlen);
 383        pag = xfs_perag_get(args.mp, agno);
 384        pag->pagi_freecount += newlen;
 385        xfs_perag_put(pag);
 386        agi->agi_newino = cpu_to_be32(newino);
 387
 388        /*
 389         * Insert records describing the new inode chunk into the btree.
 390         */
 391        cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
 392        for (thisino = newino;
 393             thisino < newino + newlen;
 394             thisino += XFS_INODES_PER_CHUNK) {
 395                cur->bc_rec.i.ir_startino = thisino;
 396                cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK;
 397                cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE;
 398                error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i);
 399                if (error) {
 400                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 401                        return error;
 402                }
 403                ASSERT(i == 0);
 404                error = xfs_btree_insert(cur, &i);
 405                if (error) {
 406                        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 407                        return error;
 408                }
 409                ASSERT(i == 1);
 410        }
 411        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 412        /*
 413         * Log allocation group header fields
 414         */
 415        xfs_ialloc_log_agi(tp, agbp,
 416                XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
 417        /*
 418         * Modify/log superblock values for inode count and inode free count.
 419         */
 420        xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
 421        xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
 422        *alloc = 1;
 423        return 0;
 424}
 425
 426STATIC xfs_agnumber_t
 427xfs_ialloc_next_ag(
 428        xfs_mount_t     *mp)
 429{
 430        xfs_agnumber_t  agno;
 431
 432        spin_lock(&mp->m_agirotor_lock);
 433        agno = mp->m_agirotor;
 434        if (++mp->m_agirotor == mp->m_maxagi)
 435                mp->m_agirotor = 0;
 436        spin_unlock(&mp->m_agirotor_lock);
 437
 438        return agno;
 439}
 440
 441/*
 442 * Select an allocation group to look for a free inode in, based on the parent
 443 * inode and then mode.  Return the allocation group buffer.
 444 */
 445STATIC xfs_agnumber_t
 446xfs_ialloc_ag_select(
 447        xfs_trans_t     *tp,            /* transaction pointer */
 448        xfs_ino_t       parent,         /* parent directory inode number */
 449        umode_t         mode,           /* bits set to indicate file type */
 450        int             okalloc)        /* ok to allocate more space */
 451{
 452        xfs_agnumber_t  agcount;        /* number of ag's in the filesystem */
 453        xfs_agnumber_t  agno;           /* current ag number */
 454        int             flags;          /* alloc buffer locking flags */
 455        xfs_extlen_t    ineed;          /* blocks needed for inode allocation */
 456        xfs_extlen_t    longest = 0;    /* longest extent available */
 457        xfs_mount_t     *mp;            /* mount point structure */
 458        int             needspace;      /* file mode implies space allocated */
 459        xfs_perag_t     *pag;           /* per allocation group data */
 460        xfs_agnumber_t  pagno;          /* parent (starting) ag number */
 461        int             error;
 462
 463        /*
 464         * Files of these types need at least one block if length > 0
 465         * (and they won't fit in the inode, but that's hard to figure out).
 466         */
 467        needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
 468        mp = tp->t_mountp;
 469        agcount = mp->m_maxagi;
 470        if (S_ISDIR(mode))
 471                pagno = xfs_ialloc_next_ag(mp);
 472        else {
 473                pagno = XFS_INO_TO_AGNO(mp, parent);
 474                if (pagno >= agcount)
 475                        pagno = 0;
 476        }
 477
 478        ASSERT(pagno < agcount);
 479
 480        /*
 481         * Loop through allocation groups, looking for one with a little
 482         * free space in it.  Note we don't look for free inodes, exactly.
 483         * Instead, we include whether there is a need to allocate inodes
 484         * to mean that blocks must be allocated for them,
 485         * if none are currently free.
 486         */
 487        agno = pagno;
 488        flags = XFS_ALLOC_FLAG_TRYLOCK;
 489        for (;;) {
 490                pag = xfs_perag_get(mp, agno);
 491                if (!pag->pagi_inodeok) {
 492                        xfs_ialloc_next_ag(mp);
 493                        goto nextag;
 494                }
 495
 496                if (!pag->pagi_init) {
 497                        error = xfs_ialloc_pagi_init(mp, tp, agno);
 498                        if (error)
 499                                goto nextag;
 500                }
 501
 502                if (pag->pagi_freecount) {
 503                        xfs_perag_put(pag);
 504                        return agno;
 505                }
 506
 507                if (!okalloc)
 508                        goto nextag;
 509
 510                if (!pag->pagf_init) {
 511                        error = xfs_alloc_pagf_init(mp, tp, agno, flags);
 512                        if (error)
 513                                goto nextag;
 514                }
 515
 516                /*
 517                 * Is there enough free space for the file plus a block of
 518                 * inodes? (if we need to allocate some)?
 519                 */
 520                ineed = XFS_IALLOC_BLOCKS(mp);
 521                longest = pag->pagf_longest;
 522                if (!longest)
 523                        longest = pag->pagf_flcount > 0;
 524
 525                if (pag->pagf_freeblks >= needspace + ineed &&
 526                    longest >= ineed) {
 527                        xfs_perag_put(pag);
 528                        return agno;
 529                }
 530nextag:
 531                xfs_perag_put(pag);
 532                /*
 533                 * No point in iterating over the rest, if we're shutting
 534                 * down.
 535                 */
 536                if (XFS_FORCED_SHUTDOWN(mp))
 537                        return NULLAGNUMBER;
 538                agno++;
 539                if (agno >= agcount)
 540                        agno = 0;
 541                if (agno == pagno) {
 542                        if (flags == 0)
 543                                return NULLAGNUMBER;
 544                        flags = 0;
 545                }
 546        }
 547}
 548
 549/*
 550 * Try to retrieve the next record to the left/right from the current one.
 551 */
 552STATIC int
 553xfs_ialloc_next_rec(
 554        struct xfs_btree_cur    *cur,
 555        xfs_inobt_rec_incore_t  *rec,
 556        int                     *done,
 557        int                     left)
 558{
 559        int                     error;
 560        int                     i;
 561
 562        if (left)
 563                error = xfs_btree_decrement(cur, 0, &i);
 564        else
 565                error = xfs_btree_increment(cur, 0, &i);
 566
 567        if (error)
 568                return error;
 569        *done = !i;
 570        if (i) {
 571                error = xfs_inobt_get_rec(cur, rec, &i);
 572                if (error)
 573                        return error;
 574                XFS_WANT_CORRUPTED_RETURN(i == 1);
 575        }
 576
 577        return 0;
 578}
 579
 580STATIC int
 581xfs_ialloc_get_rec(
 582        struct xfs_btree_cur    *cur,
 583        xfs_agino_t             agino,
 584        xfs_inobt_rec_incore_t  *rec,
 585        int                     *done,
 586        int                     left)
 587{
 588        int                     error;
 589        int                     i;
 590
 591        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
 592        if (error)
 593                return error;
 594        *done = !i;
 595        if (i) {
 596                error = xfs_inobt_get_rec(cur, rec, &i);
 597                if (error)
 598                        return error;
 599                XFS_WANT_CORRUPTED_RETURN(i == 1);
 600        }
 601
 602        return 0;
 603}
 604
 605/*
 606 * Allocate an inode.
 607 *
 608 * The caller selected an AG for us, and made sure that free inodes are
 609 * available.
 610 */
 611STATIC int
 612xfs_dialloc_ag(
 613        struct xfs_trans        *tp,
 614        struct xfs_buf          *agbp,
 615        xfs_ino_t               parent,
 616        xfs_ino_t               *inop)
 617{
 618        struct xfs_mount        *mp = tp->t_mountp;
 619        struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
 620        xfs_agnumber_t          agno = be32_to_cpu(agi->agi_seqno);
 621        xfs_agnumber_t          pagno = XFS_INO_TO_AGNO(mp, parent);
 622        xfs_agino_t             pagino = XFS_INO_TO_AGINO(mp, parent);
 623        struct xfs_perag        *pag;
 624        struct xfs_btree_cur    *cur, *tcur;
 625        struct xfs_inobt_rec_incore rec, trec;
 626        xfs_ino_t               ino;
 627        int                     error;
 628        int                     offset;
 629        int                     i, j;
 630
 631        pag = xfs_perag_get(mp, agno);
 632
 633        ASSERT(pag->pagi_init);
 634        ASSERT(pag->pagi_inodeok);
 635        ASSERT(pag->pagi_freecount > 0);
 636
 637 restart_pagno:
 638        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
 639        /*
 640         * If pagino is 0 (this is the root inode allocation) use newino.
 641         * This must work because we've just allocated some.
 642         */
 643        if (!pagino)
 644                pagino = be32_to_cpu(agi->agi_newino);
 645
 646        error = xfs_check_agi_freecount(cur, agi);
 647        if (error)
 648                goto error0;
 649
 650        /*
 651         * If in the same AG as the parent, try to get near the parent.
 652         */
 653        if (pagno == agno) {
 654                int             doneleft;       /* done, to the left */
 655                int             doneright;      /* done, to the right */
 656                int             searchdistance = 10;
 657
 658                error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
 659                if (error)
 660                        goto error0;
 661                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 662
 663                error = xfs_inobt_get_rec(cur, &rec, &j);
 664                if (error)
 665                        goto error0;
 666                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 667
 668                if (rec.ir_freecount > 0) {
 669                        /*
 670                         * Found a free inode in the same chunk
 671                         * as the parent, done.
 672                         */
 673                        goto alloc_inode;
 674                }
 675
 676
 677                /*
 678                 * In the same AG as parent, but parent's chunk is full.
 679                 */
 680
 681                /* duplicate the cursor, search left & right simultaneously */
 682                error = xfs_btree_dup_cursor(cur, &tcur);
 683                if (error)
 684                        goto error0;
 685
 686                /*
 687                 * Skip to last blocks looked up if same parent inode.
 688                 */
 689                if (pagino != NULLAGINO &&
 690                    pag->pagl_pagino == pagino &&
 691                    pag->pagl_leftrec != NULLAGINO &&
 692                    pag->pagl_rightrec != NULLAGINO) {
 693                        error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
 694                                                   &trec, &doneleft, 1);
 695                        if (error)
 696                                goto error1;
 697
 698                        error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
 699                                                   &rec, &doneright, 0);
 700                        if (error)
 701                                goto error1;
 702                } else {
 703                        /* search left with tcur, back up 1 record */
 704                        error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
 705                        if (error)
 706                                goto error1;
 707
 708                        /* search right with cur, go forward 1 record. */
 709                        error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
 710                        if (error)
 711                                goto error1;
 712                }
 713
 714                /*
 715                 * Loop until we find an inode chunk with a free inode.
 716                 */
 717                while (!doneleft || !doneright) {
 718                        int     useleft;  /* using left inode chunk this time */
 719
 720                        if (!--searchdistance) {
 721                                /*
 722                                 * Not in range - save last search
 723                                 * location and allocate a new inode
 724                                 */
 725                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 726                                pag->pagl_leftrec = trec.ir_startino;
 727                                pag->pagl_rightrec = rec.ir_startino;
 728                                pag->pagl_pagino = pagino;
 729                                goto newino;
 730                        }
 731
 732                        /* figure out the closer block if both are valid. */
 733                        if (!doneleft && !doneright) {
 734                                useleft = pagino -
 735                                 (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
 736                                  rec.ir_startino - pagino;
 737                        } else {
 738                                useleft = !doneleft;
 739                        }
 740
 741                        /* free inodes to the left? */
 742                        if (useleft && trec.ir_freecount) {
 743                                rec = trec;
 744                                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 745                                cur = tcur;
 746
 747                                pag->pagl_leftrec = trec.ir_startino;
 748                                pag->pagl_rightrec = rec.ir_startino;
 749                                pag->pagl_pagino = pagino;
 750                                goto alloc_inode;
 751                        }
 752
 753                        /* free inodes to the right? */
 754                        if (!useleft && rec.ir_freecount) {
 755                                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 756
 757                                pag->pagl_leftrec = trec.ir_startino;
 758                                pag->pagl_rightrec = rec.ir_startino;
 759                                pag->pagl_pagino = pagino;
 760                                goto alloc_inode;
 761                        }
 762
 763                        /* get next record to check */
 764                        if (useleft) {
 765                                error = xfs_ialloc_next_rec(tcur, &trec,
 766                                                                 &doneleft, 1);
 767                        } else {
 768                                error = xfs_ialloc_next_rec(cur, &rec,
 769                                                                 &doneright, 0);
 770                        }
 771                        if (error)
 772                                goto error1;
 773                }
 774
 775                /*
 776                 * We've reached the end of the btree. because
 777                 * we are only searching a small chunk of the
 778                 * btree each search, there is obviously free
 779                 * inodes closer to the parent inode than we
 780                 * are now. restart the search again.
 781                 */
 782                pag->pagl_pagino = NULLAGINO;
 783                pag->pagl_leftrec = NULLAGINO;
 784                pag->pagl_rightrec = NULLAGINO;
 785                xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
 786                xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 787                goto restart_pagno;
 788        }
 789
 790        /*
 791         * In a different AG from the parent.
 792         * See if the most recently allocated block has any free.
 793         */
 794newino:
 795        if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
 796                error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
 797                                         XFS_LOOKUP_EQ, &i);
 798                if (error)
 799                        goto error0;
 800
 801                if (i == 1) {
 802                        error = xfs_inobt_get_rec(cur, &rec, &j);
 803                        if (error)
 804                                goto error0;
 805
 806                        if (j == 1 && rec.ir_freecount > 0) {
 807                                /*
 808                                 * The last chunk allocated in the group
 809                                 * still has a free inode.
 810                                 */
 811                                goto alloc_inode;
 812                        }
 813                }
 814        }
 815
 816        /*
 817         * None left in the last group, search the whole AG
 818         */
 819        error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
 820        if (error)
 821                goto error0;
 822        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 823
 824        for (;;) {
 825                error = xfs_inobt_get_rec(cur, &rec, &i);
 826                if (error)
 827                        goto error0;
 828                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 829                if (rec.ir_freecount > 0)
 830                        break;
 831                error = xfs_btree_increment(cur, 0, &i);
 832                if (error)
 833                        goto error0;
 834                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 835        }
 836
 837alloc_inode:
 838        offset = xfs_lowbit64(rec.ir_free);
 839        ASSERT(offset >= 0);
 840        ASSERT(offset < XFS_INODES_PER_CHUNK);
 841        ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
 842                                   XFS_INODES_PER_CHUNK) == 0);
 843        ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
 844        rec.ir_free &= ~XFS_INOBT_MASK(offset);
 845        rec.ir_freecount--;
 846        error = xfs_inobt_update(cur, &rec);
 847        if (error)
 848                goto error0;
 849        be32_add_cpu(&agi->agi_freecount, -1);
 850        xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
 851        pag->pagi_freecount--;
 852
 853        error = xfs_check_agi_freecount(cur, agi);
 854        if (error)
 855                goto error0;
 856
 857        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 858        xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
 859        xfs_perag_put(pag);
 860        *inop = ino;
 861        return 0;
 862error1:
 863        xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
 864error0:
 865        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
 866        xfs_perag_put(pag);
 867        return error;
 868}
 869
 870/*
 871 * Allocate an inode on disk.
 872 *
 873 * Mode is used to tell whether the new inode will need space, and whether it
 874 * is a directory.
 875 *
 876 * This function is designed to be called twice if it has to do an allocation
 877 * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
 878 * If an inode is available without having to performn an allocation, an inode
 879 * number is returned.  In this case, *IO_agbp would be NULL.  If an allocation
 880 * needes to be done, xfs_dialloc would return the current AGI buffer in
 881 * *IO_agbp.  The caller should then commit the current transaction, allocate a
 882 * new transaction, and call xfs_dialloc() again, passing in the previous value
 883 * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
 884 * buffer is locked across the two calls, the second call is guaranteed to have
 885 * a free inode available.
 886 *
 887 * Once we successfully pick an inode its number is returned and the on-disk
 888 * data structures are updated.  The inode itself is not read in, since doing so
 889 * would break ordering constraints with xfs_reclaim.
 890 */
 891int
 892xfs_dialloc(
 893        struct xfs_trans        *tp,
 894        xfs_ino_t               parent,
 895        umode_t                 mode,
 896        int                     okalloc,
 897        struct xfs_buf          **IO_agbp,
 898        xfs_ino_t               *inop)
 899{
 900        struct xfs_mount        *mp = tp->t_mountp;
 901        struct xfs_buf          *agbp;
 902        xfs_agnumber_t          agno;
 903        int                     error;
 904        int                     ialloced;
 905        int                     noroom = 0;
 906        xfs_agnumber_t          start_agno;
 907        struct xfs_perag        *pag;
 908
 909        if (*IO_agbp) {
 910                /*
 911                 * If the caller passes in a pointer to the AGI buffer,
 912                 * continue where we left off before.  In this case, we
 913                 * know that the allocation group has free inodes.
 914                 */
 915                agbp = *IO_agbp;
 916                goto out_alloc;
 917        }
 918
 919        /*
 920         * We do not have an agbp, so select an initial allocation
 921         * group for inode allocation.
 922         */
 923        start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
 924        if (start_agno == NULLAGNUMBER) {
 925                *inop = NULLFSINO;
 926                return 0;
 927        }
 928
 929        /*
 930         * If we have already hit the ceiling of inode blocks then clear
 931         * okalloc so we scan all available agi structures for a free
 932         * inode.
 933         */
 934        if (mp->m_maxicount &&
 935            mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
 936                noroom = 1;
 937                okalloc = 0;
 938        }
 939
 940        /*
 941         * Loop until we find an allocation group that either has free inodes
 942         * or in which we can allocate some inodes.  Iterate through the
 943         * allocation groups upward, wrapping at the end.
 944         */
 945        agno = start_agno;
 946        for (;;) {
 947                pag = xfs_perag_get(mp, agno);
 948                if (!pag->pagi_inodeok) {
 949                        xfs_ialloc_next_ag(mp);
 950                        goto nextag;
 951                }
 952
 953                if (!pag->pagi_init) {
 954                        error = xfs_ialloc_pagi_init(mp, tp, agno);
 955                        if (error)
 956                                goto out_error;
 957                }
 958
 959                /*
 960                 * Do a first racy fast path check if this AG is usable.
 961                 */
 962                if (!pag->pagi_freecount && !okalloc)
 963                        goto nextag;
 964
 965                /*
 966                 * Then read in the AGI buffer and recheck with the AGI buffer
 967                 * lock held.
 968                 */
 969                error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
 970                if (error)
 971                        goto out_error;
 972
 973                if (pag->pagi_freecount) {
 974                        xfs_perag_put(pag);
 975                        goto out_alloc;
 976                }
 977
 978                if (!okalloc)
 979                        goto nextag_relse_buffer;
 980
 981
 982                error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
 983                if (error) {
 984                        xfs_trans_brelse(tp, agbp);
 985
 986                        if (error != ENOSPC)
 987                                goto out_error;
 988
 989                        xfs_perag_put(pag);
 990                        *inop = NULLFSINO;
 991                        return 0;
 992                }
 993
 994                if (ialloced) {
 995                        /*
 996                         * We successfully allocated some inodes, return
 997                         * the current context to the caller so that it
 998                         * can commit the current transaction and call
 999                         * us again where we left off.
1000                         */
1001                        ASSERT(pag->pagi_freecount > 0);
1002                        xfs_perag_put(pag);
1003
1004                        *IO_agbp = agbp;
1005                        *inop = NULLFSINO;
1006                        return 0;
1007                }
1008
1009nextag_relse_buffer:
1010                xfs_trans_brelse(tp, agbp);
1011nextag:
1012                xfs_perag_put(pag);
1013                if (++agno == mp->m_sb.sb_agcount)
1014                        agno = 0;
1015                if (agno == start_agno) {
1016                        *inop = NULLFSINO;
1017                        return noroom ? ENOSPC : 0;
1018                }
1019        }
1020
1021out_alloc:
1022        *IO_agbp = NULL;
1023        return xfs_dialloc_ag(tp, agbp, parent, inop);
1024out_error:
1025        xfs_perag_put(pag);
1026        return XFS_ERROR(error);
1027}
1028
1029/*
1030 * Free disk inode.  Carefully avoids touching the incore inode, all
1031 * manipulations incore are the caller's responsibility.
1032 * The on-disk inode is not changed by this operation, only the
1033 * btree (free inode mask) is changed.
1034 */
1035int
1036xfs_difree(
1037        xfs_trans_t     *tp,            /* transaction pointer */
1038        xfs_ino_t       inode,          /* inode to be freed */
1039        xfs_bmap_free_t *flist,         /* extents to free */
1040        int             *delete,        /* set if inode cluster was deleted */
1041        xfs_ino_t       *first_ino)     /* first inode in deleted cluster */
1042{
1043        /* REFERENCED */
1044        xfs_agblock_t   agbno;  /* block number containing inode */
1045        xfs_buf_t       *agbp;  /* buffer containing allocation group header */
1046        xfs_agino_t     agino;  /* inode number relative to allocation group */
1047        xfs_agnumber_t  agno;   /* allocation group number */
1048        xfs_agi_t       *agi;   /* allocation group header */
1049        xfs_btree_cur_t *cur;   /* inode btree cursor */
1050        int             error;  /* error return value */
1051        int             i;      /* result code */
1052        int             ilen;   /* inodes in an inode cluster */
1053        xfs_mount_t     *mp;    /* mount structure for filesystem */
1054        int             off;    /* offset of inode in inode chunk */
1055        xfs_inobt_rec_incore_t rec;     /* btree record */
1056        struct xfs_perag *pag;
1057
1058        mp = tp->t_mountp;
1059
1060        /*
1061         * Break up inode number into its components.
1062         */
1063        agno = XFS_INO_TO_AGNO(mp, inode);
1064        if (agno >= mp->m_sb.sb_agcount)  {
1065                xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1066                        __func__, agno, mp->m_sb.sb_agcount);
1067                ASSERT(0);
1068                return XFS_ERROR(EINVAL);
1069        }
1070        agino = XFS_INO_TO_AGINO(mp, inode);
1071        if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  {
1072                xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1073                        __func__, (unsigned long long)inode,
1074                        (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1075                ASSERT(0);
1076                return XFS_ERROR(EINVAL);
1077        }
1078        agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1079        if (agbno >= mp->m_sb.sb_agblocks)  {
1080                xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1081                        __func__, agbno, mp->m_sb.sb_agblocks);
1082                ASSERT(0);
1083                return XFS_ERROR(EINVAL);
1084        }
1085        /*
1086         * Get the allocation group header.
1087         */
1088        error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1089        if (error) {
1090                xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1091                        __func__, error);
1092                return error;
1093        }
1094        agi = XFS_BUF_TO_AGI(agbp);
1095        ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1096        ASSERT(agbno < be32_to_cpu(agi->agi_length));
1097        /*
1098         * Initialize the cursor.
1099         */
1100        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1101
1102        error = xfs_check_agi_freecount(cur, agi);
1103        if (error)
1104                goto error0;
1105
1106        /*
1107         * Look for the entry describing this inode.
1108         */
1109        if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
1110                xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
1111                        __func__, error);
1112                goto error0;
1113        }
1114        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1115        error = xfs_inobt_get_rec(cur, &rec, &i);
1116        if (error) {
1117                xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
1118                        __func__, error);
1119                goto error0;
1120        }
1121        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1122        /*
1123         * Get the offset in the inode chunk.
1124         */
1125        off = agino - rec.ir_startino;
1126        ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
1127        ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
1128        /*
1129         * Mark the inode free & increment the count.
1130         */
1131        rec.ir_free |= XFS_INOBT_MASK(off);
1132        rec.ir_freecount++;
1133
1134        /*
1135         * When an inode cluster is free, it becomes eligible for removal
1136         */
1137        if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
1138            (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
1139
1140                *delete = 1;
1141                *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
1142
1143                /*
1144                 * Remove the inode cluster from the AGI B+Tree, adjust the
1145                 * AGI and Superblock inode counts, and mark the disk space
1146                 * to be freed when the transaction is committed.
1147                 */
1148                ilen = XFS_IALLOC_INODES(mp);
1149                be32_add_cpu(&agi->agi_count, -ilen);
1150                be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1151                xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1152                pag = xfs_perag_get(mp, agno);
1153                pag->pagi_freecount -= ilen - 1;
1154                xfs_perag_put(pag);
1155                xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1156                xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1157
1158                if ((error = xfs_btree_delete(cur, &i))) {
1159                        xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
1160                                __func__, error);
1161                        goto error0;
1162                }
1163
1164                xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
1165                                agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
1166                                XFS_IALLOC_BLOCKS(mp), flist, mp);
1167        } else {
1168                *delete = 0;
1169
1170                error = xfs_inobt_update(cur, &rec);
1171                if (error) {
1172                        xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
1173                                __func__, error);
1174                        goto error0;
1175                }
1176
1177                /* 
1178                 * Change the inode free counts and log the ag/sb changes.
1179                 */
1180                be32_add_cpu(&agi->agi_freecount, 1);
1181                xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1182                pag = xfs_perag_get(mp, agno);
1183                pag->pagi_freecount++;
1184                xfs_perag_put(pag);
1185                xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1186        }
1187
1188        error = xfs_check_agi_freecount(cur, agi);
1189        if (error)
1190                goto error0;
1191
1192        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1193        return 0;
1194
1195error0:
1196        xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1197        return error;
1198}
1199
1200STATIC int
1201xfs_imap_lookup(
1202        struct xfs_mount        *mp,
1203        struct xfs_trans        *tp,
1204        xfs_agnumber_t          agno,
1205        xfs_agino_t             agino,
1206        xfs_agblock_t           agbno,
1207        xfs_agblock_t           *chunk_agbno,
1208        xfs_agblock_t           *offset_agbno,
1209        int                     flags)
1210{
1211        struct xfs_inobt_rec_incore rec;
1212        struct xfs_btree_cur    *cur;
1213        struct xfs_buf          *agbp;
1214        int                     error;
1215        int                     i;
1216
1217        error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1218        if (error) {
1219                xfs_alert(mp,
1220                        "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
1221                        __func__, error, agno);
1222                return error;
1223        }
1224
1225        /*
1226         * Lookup the inode record for the given agino. If the record cannot be
1227         * found, then it's an invalid inode number and we should abort. Once
1228         * we have a record, we need to ensure it contains the inode number
1229         * we are looking up.
1230         */
1231        cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1232        error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1233        if (!error) {
1234                if (i)
1235                        error = xfs_inobt_get_rec(cur, &rec, &i);
1236                if (!error && i == 0)
1237                        error = EINVAL;
1238        }
1239
1240        xfs_trans_brelse(tp, agbp);
1241        xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1242        if (error)
1243                return error;
1244
1245        /* check that the returned record contains the required inode */
1246        if (rec.ir_startino > agino ||
1247            rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
1248                return EINVAL;
1249
1250        /* for untrusted inodes check it is allocated first */
1251        if ((flags & XFS_IGET_UNTRUSTED) &&
1252            (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
1253                return EINVAL;
1254
1255        *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
1256        *offset_agbno = agbno - *chunk_agbno;
1257        return 0;
1258}
1259
1260/*
1261 * Return the location of the inode in imap, for mapping it into a buffer.
1262 */
1263int
1264xfs_imap(
1265        xfs_mount_t      *mp,   /* file system mount structure */
1266        xfs_trans_t      *tp,   /* transaction pointer */
1267        xfs_ino_t       ino,    /* inode to locate */
1268        struct xfs_imap *imap,  /* location map structure */
1269        uint            flags)  /* flags for inode btree lookup */
1270{
1271        xfs_agblock_t   agbno;  /* block number of inode in the alloc group */
1272        xfs_agino_t     agino;  /* inode number within alloc group */
1273        xfs_agnumber_t  agno;   /* allocation group number */
1274        int             blks_per_cluster; /* num blocks per inode cluster */
1275        xfs_agblock_t   chunk_agbno;    /* first block in inode chunk */
1276        xfs_agblock_t   cluster_agbno;  /* first block in inode cluster */
1277        int             error;  /* error code */
1278        int             offset; /* index of inode in its buffer */
1279        int             offset_agbno;   /* blks from chunk start to inode */
1280
1281        ASSERT(ino != NULLFSINO);
1282
1283        /*
1284         * Split up the inode number into its parts.
1285         */
1286        agno = XFS_INO_TO_AGNO(mp, ino);
1287        agino = XFS_INO_TO_AGINO(mp, ino);
1288        agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1289        if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1290            ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1291#ifdef DEBUG
1292                /*
1293                 * Don't output diagnostic information for untrusted inodes
1294                 * as they can be invalid without implying corruption.
1295                 */
1296                if (flags & XFS_IGET_UNTRUSTED)
1297                        return XFS_ERROR(EINVAL);
1298                if (agno >= mp->m_sb.sb_agcount) {
1299                        xfs_alert(mp,
1300                                "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
1301                                __func__, agno, mp->m_sb.sb_agcount);
1302                }
1303                if (agbno >= mp->m_sb.sb_agblocks) {
1304                        xfs_alert(mp,
1305                "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
1306                                __func__, (unsigned long long)agbno,
1307                                (unsigned long)mp->m_sb.sb_agblocks);
1308                }
1309                if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1310                        xfs_alert(mp,
1311                "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
1312                                __func__, ino,
1313                                XFS_AGINO_TO_INO(mp, agno, agino));
1314                }
1315                xfs_stack_trace();
1316#endif /* DEBUG */
1317                return XFS_ERROR(EINVAL);
1318        }
1319
1320        blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1321
1322        /*
1323         * For bulkstat and handle lookups, we have an untrusted inode number
1324         * that we have to verify is valid. We cannot do this just by reading
1325         * the inode buffer as it may have been unlinked and removed leaving
1326         * inodes in stale state on disk. Hence we have to do a btree lookup
1327         * in all cases where an untrusted inode number is passed.
1328         */
1329        if (flags & XFS_IGET_UNTRUSTED) {
1330                error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1331                                        &chunk_agbno, &offset_agbno, flags);
1332                if (error)
1333                        return error;
1334                goto out_map;
1335        }
1336
1337        /*
1338         * If the inode cluster size is the same as the blocksize or
1339         * smaller we get to the buffer by simple arithmetics.
1340         */
1341        if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) {
1342                offset = XFS_INO_TO_OFFSET(mp, ino);
1343                ASSERT(offset < mp->m_sb.sb_inopblock);
1344
1345                imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
1346                imap->im_len = XFS_FSB_TO_BB(mp, 1);
1347                imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1348                return 0;
1349        }
1350
1351        /*
1352         * If the inode chunks are aligned then use simple maths to
1353         * find the location. Otherwise we have to do a btree
1354         * lookup to find the location.
1355         */
1356        if (mp->m_inoalign_mask) {
1357                offset_agbno = agbno & mp->m_inoalign_mask;
1358                chunk_agbno = agbno - offset_agbno;
1359        } else {
1360                error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1361                                        &chunk_agbno, &offset_agbno, flags);
1362                if (error)
1363                        return error;
1364        }
1365
1366out_map:
1367        ASSERT(agbno >= chunk_agbno);
1368        cluster_agbno = chunk_agbno +
1369                ((offset_agbno / blks_per_cluster) * blks_per_cluster);
1370        offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1371                XFS_INO_TO_OFFSET(mp, ino);
1372
1373        imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
1374        imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1375        imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1376
1377        /*
1378         * If the inode number maps to a block outside the bounds
1379         * of the file system then return NULL rather than calling
1380         * read_buf and panicing when we get an error from the
1381         * driver.
1382         */
1383        if ((imap->im_blkno + imap->im_len) >
1384            XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1385                xfs_alert(mp,
1386        "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
1387                        __func__, (unsigned long long) imap->im_blkno,
1388                        (unsigned long long) imap->im_len,
1389                        XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1390                return XFS_ERROR(EINVAL);
1391        }
1392        return 0;
1393}
1394
1395/*
1396 * Compute and fill in value of m_in_maxlevels.
1397 */
1398void
1399xfs_ialloc_compute_maxlevels(
1400        xfs_mount_t     *mp)            /* file system mount structure */
1401{
1402        int             level;
1403        uint            maxblocks;
1404        uint            maxleafents;
1405        int             minleafrecs;
1406        int             minnoderecs;
1407
1408        maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
1409                XFS_INODES_PER_CHUNK_LOG;
1410        minleafrecs = mp->m_alloc_mnr[0];
1411        minnoderecs = mp->m_alloc_mnr[1];
1412        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1413        for (level = 1; maxblocks > 1; level++)
1414                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1415        mp->m_in_maxlevels = level;
1416}
1417
1418/*
1419 * Log specified fields for the ag hdr (inode section)
1420 */
1421void
1422xfs_ialloc_log_agi(
1423        xfs_trans_t     *tp,            /* transaction pointer */
1424        xfs_buf_t       *bp,            /* allocation group header buffer */
1425        int             fields)         /* bitmask of fields to log */
1426{
1427        int                     first;          /* first byte number */
1428        int                     last;           /* last byte number */
1429        static const short      offsets[] = {   /* field starting offsets */
1430                                        /* keep in sync with bit definitions */
1431                offsetof(xfs_agi_t, agi_magicnum),
1432                offsetof(xfs_agi_t, agi_versionnum),
1433                offsetof(xfs_agi_t, agi_seqno),
1434                offsetof(xfs_agi_t, agi_length),
1435                offsetof(xfs_agi_t, agi_count),
1436                offsetof(xfs_agi_t, agi_root),
1437                offsetof(xfs_agi_t, agi_level),
1438                offsetof(xfs_agi_t, agi_freecount),
1439                offsetof(xfs_agi_t, agi_newino),
1440                offsetof(xfs_agi_t, agi_dirino),
1441                offsetof(xfs_agi_t, agi_unlinked),
1442                sizeof(xfs_agi_t)
1443        };
1444#ifdef DEBUG
1445        xfs_agi_t               *agi;   /* allocation group header */
1446
1447        agi = XFS_BUF_TO_AGI(bp);
1448        ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1449#endif
1450        /*
1451         * Compute byte offsets for the first and last fields.
1452         */
1453        xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
1454        /*
1455         * Log the allocation group inode header buffer.
1456         */
1457        xfs_trans_log_buf(tp, bp, first, last);
1458}
1459
1460#ifdef DEBUG
1461STATIC void
1462xfs_check_agi_unlinked(
1463        struct xfs_agi          *agi)
1464{
1465        int                     i;
1466
1467        for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
1468                ASSERT(agi->agi_unlinked[i]);
1469}
1470#else
1471#define xfs_check_agi_unlinked(agi)
1472#endif
1473
1474/*
1475 * Read in the allocation group header (inode allocation section)
1476 */
1477int
1478xfs_read_agi(
1479        struct xfs_mount        *mp,    /* file system mount structure */
1480        struct xfs_trans        *tp,    /* transaction pointer */
1481        xfs_agnumber_t          agno,   /* allocation group number */
1482        struct xfs_buf          **bpp)  /* allocation group hdr buf */
1483{
1484        struct xfs_agi          *agi;   /* allocation group header */
1485        int                     agi_ok; /* agi is consistent */
1486        int                     error;
1487
1488        ASSERT(agno != NULLAGNUMBER);
1489
1490        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
1491                        XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1492                        XFS_FSS_TO_BB(mp, 1), 0, bpp);
1493        if (error)
1494                return error;
1495
1496        ASSERT(!xfs_buf_geterror(*bpp));
1497        agi = XFS_BUF_TO_AGI(*bpp);
1498
1499        /*
1500         * Validate the magic number of the agi block.
1501         */
1502        agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
1503                XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
1504                be32_to_cpu(agi->agi_seqno) == agno;
1505        if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1506                        XFS_RANDOM_IALLOC_READ_AGI))) {
1507                XFS_CORRUPTION_ERROR("xfs_read_agi", XFS_ERRLEVEL_LOW,
1508                                     mp, agi);
1509                xfs_trans_brelse(tp, *bpp);
1510                return XFS_ERROR(EFSCORRUPTED);
1511        }
1512
1513        xfs_buf_set_ref(*bpp, XFS_AGI_REF);
1514
1515        xfs_check_agi_unlinked(agi);
1516        return 0;
1517}
1518
1519int
1520xfs_ialloc_read_agi(
1521        struct xfs_mount        *mp,    /* file system mount structure */
1522        struct xfs_trans        *tp,    /* transaction pointer */
1523        xfs_agnumber_t          agno,   /* allocation group number */
1524        struct xfs_buf          **bpp)  /* allocation group hdr buf */
1525{
1526        struct xfs_agi          *agi;   /* allocation group header */
1527        struct xfs_perag        *pag;   /* per allocation group data */
1528        int                     error;
1529
1530        error = xfs_read_agi(mp, tp, agno, bpp);
1531        if (error)
1532                return error;
1533
1534        agi = XFS_BUF_TO_AGI(*bpp);
1535        pag = xfs_perag_get(mp, agno);
1536        if (!pag->pagi_init) {
1537                pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
1538                pag->pagi_count = be32_to_cpu(agi->agi_count);
1539                pag->pagi_init = 1;
1540        }
1541
1542        /*
1543         * It's possible for these to be out of sync if
1544         * we are in the middle of a forced shutdown.
1545         */
1546        ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1547                XFS_FORCED_SHUTDOWN(mp));
1548        xfs_perag_put(pag);
1549        return 0;
1550}
1551
1552/*
1553 * Read in the agi to initialise the per-ag data in the mount structure
1554 */
1555int
1556xfs_ialloc_pagi_init(
1557        xfs_mount_t     *mp,            /* file system mount structure */
1558        xfs_trans_t     *tp,            /* transaction pointer */
1559        xfs_agnumber_t  agno)           /* allocation group number */
1560{
1561        xfs_buf_t       *bp = NULL;
1562        int             error;
1563
1564        error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
1565        if (error)
1566                return error;
1567        if (bp)
1568                xfs_trans_brelse(tp, bp);
1569        return 0;
1570}
1571
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.