linux/fs/xfs/xfs_alloc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
   3 * All Rights Reserved.
   4 *
   5 * This program is free software; you can redistribute it and/or
   6 * modify it under the terms of the GNU General Public License as
   7 * published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it would be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write the Free Software Foundation,
  16 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17 */
  18#include "xfs.h"
  19#include "xfs_fs.h"
  20#include "xfs_types.h"
  21#include "xfs_bit.h"
  22#include "xfs_log.h"
  23#include "xfs_trans.h"
  24#include "xfs_sb.h"
  25#include "xfs_ag.h"
  26#include "xfs_mount.h"
  27#include "xfs_bmap_btree.h"
  28#include "xfs_alloc_btree.h"
  29#include "xfs_ialloc_btree.h"
  30#include "xfs_dinode.h"
  31#include "xfs_inode.h"
  32#include "xfs_btree.h"
  33#include "xfs_alloc.h"
  34#include "xfs_extent_busy.h"
  35#include "xfs_error.h"
  36#include "xfs_trace.h"
  37
  38struct workqueue_struct *xfs_alloc_wq;
  39
  40#define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
  41
  42#define XFSA_FIXUP_BNO_OK       1
  43#define XFSA_FIXUP_CNT_OK       2
  44
  45STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
  46STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
  47STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
  48STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
  49                xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
  50
  51/*
  52 * Lookup the record equal to [bno, len] in the btree given by cur.
  53 */
  54STATIC int                              /* error */
  55xfs_alloc_lookup_eq(
  56        struct xfs_btree_cur    *cur,   /* btree cursor */
  57        xfs_agblock_t           bno,    /* starting block of extent */
  58        xfs_extlen_t            len,    /* length of extent */
  59        int                     *stat)  /* success/failure */
  60{
  61        cur->bc_rec.a.ar_startblock = bno;
  62        cur->bc_rec.a.ar_blockcount = len;
  63        return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
  64}
  65
  66/*
  67 * Lookup the first record greater than or equal to [bno, len]
  68 * in the btree given by cur.
  69 */
  70int                             /* error */
  71xfs_alloc_lookup_ge(
  72        struct xfs_btree_cur    *cur,   /* btree cursor */
  73        xfs_agblock_t           bno,    /* starting block of extent */
  74        xfs_extlen_t            len,    /* length of extent */
  75        int                     *stat)  /* success/failure */
  76{
  77        cur->bc_rec.a.ar_startblock = bno;
  78        cur->bc_rec.a.ar_blockcount = len;
  79        return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
  80}
  81
  82/*
  83 * Lookup the first record less than or equal to [bno, len]
  84 * in the btree given by cur.
  85 */
  86int                                     /* error */
  87xfs_alloc_lookup_le(
  88        struct xfs_btree_cur    *cur,   /* btree cursor */
  89        xfs_agblock_t           bno,    /* starting block of extent */
  90        xfs_extlen_t            len,    /* length of extent */
  91        int                     *stat)  /* success/failure */
  92{
  93        cur->bc_rec.a.ar_startblock = bno;
  94        cur->bc_rec.a.ar_blockcount = len;
  95        return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
  96}
  97
  98/*
  99 * Update the record referred to by cur to the value given
 100 * by [bno, len].
 101 * This either works (return 0) or gets an EFSCORRUPTED error.
 102 */
 103STATIC int                              /* error */
 104xfs_alloc_update(
 105        struct xfs_btree_cur    *cur,   /* btree cursor */
 106        xfs_agblock_t           bno,    /* starting block of extent */
 107        xfs_extlen_t            len)    /* length of extent */
 108{
 109        union xfs_btree_rec     rec;
 110
 111        rec.alloc.ar_startblock = cpu_to_be32(bno);
 112        rec.alloc.ar_blockcount = cpu_to_be32(len);
 113        return xfs_btree_update(cur, &rec);
 114}
 115
 116/*
 117 * Get the data from the pointed-to record.
 118 */
 119int                                     /* error */
 120xfs_alloc_get_rec(
 121        struct xfs_btree_cur    *cur,   /* btree cursor */
 122        xfs_agblock_t           *bno,   /* output: starting block of extent */
 123        xfs_extlen_t            *len,   /* output: length of extent */
 124        int                     *stat)  /* output: success/failure */
 125{
 126        union xfs_btree_rec     *rec;
 127        int                     error;
 128
 129        error = xfs_btree_get_rec(cur, &rec, stat);
 130        if (!error && *stat == 1) {
 131                *bno = be32_to_cpu(rec->alloc.ar_startblock);
 132                *len = be32_to_cpu(rec->alloc.ar_blockcount);
 133        }
 134        return error;
 135}
 136
 137/*
 138 * Compute aligned version of the found extent.
 139 * Takes alignment and min length into account.
 140 */
 141STATIC void
 142xfs_alloc_compute_aligned(
 143        xfs_alloc_arg_t *args,          /* allocation argument structure */
 144        xfs_agblock_t   foundbno,       /* starting block in found extent */
 145        xfs_extlen_t    foundlen,       /* length in found extent */
 146        xfs_agblock_t   *resbno,        /* result block number */
 147        xfs_extlen_t    *reslen)        /* result length */
 148{
 149        xfs_agblock_t   bno;
 150        xfs_extlen_t    len;
 151
 152        /* Trim busy sections out of found extent */
 153        xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len);
 154
 155        if (args->alignment > 1 && len >= args->minlen) {
 156                xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
 157                xfs_extlen_t    diff = aligned_bno - bno;
 158
 159                *resbno = aligned_bno;
 160                *reslen = diff >= len ? 0 : len - diff;
 161        } else {
 162                *resbno = bno;
 163                *reslen = len;
 164        }
 165}
 166
 167/*
 168 * Compute best start block and diff for "near" allocations.
 169 * freelen >= wantlen already checked by caller.
 170 */
 171STATIC xfs_extlen_t                     /* difference value (absolute) */
 172xfs_alloc_compute_diff(
 173        xfs_agblock_t   wantbno,        /* target starting block */
 174        xfs_extlen_t    wantlen,        /* target length */
 175        xfs_extlen_t    alignment,      /* target alignment */
 176        xfs_agblock_t   freebno,        /* freespace's starting block */
 177        xfs_extlen_t    freelen,        /* freespace's length */
 178        xfs_agblock_t   *newbnop)       /* result: best start block from free */
 179{
 180        xfs_agblock_t   freeend;        /* end of freespace extent */
 181        xfs_agblock_t   newbno1;        /* return block number */
 182        xfs_agblock_t   newbno2;        /* other new block number */
 183        xfs_extlen_t    newlen1=0;      /* length with newbno1 */
 184        xfs_extlen_t    newlen2=0;      /* length with newbno2 */
 185        xfs_agblock_t   wantend;        /* end of target extent */
 186
 187        ASSERT(freelen >= wantlen);
 188        freeend = freebno + freelen;
 189        wantend = wantbno + wantlen;
 190        if (freebno >= wantbno) {
 191                if ((newbno1 = roundup(freebno, alignment)) >= freeend)
 192                        newbno1 = NULLAGBLOCK;
 193        } else if (freeend >= wantend && alignment > 1) {
 194                newbno1 = roundup(wantbno, alignment);
 195                newbno2 = newbno1 - alignment;
 196                if (newbno1 >= freeend)
 197                        newbno1 = NULLAGBLOCK;
 198                else
 199                        newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
 200                if (newbno2 < freebno)
 201                        newbno2 = NULLAGBLOCK;
 202                else
 203                        newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
 204                if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
 205                        if (newlen1 < newlen2 ||
 206                            (newlen1 == newlen2 &&
 207                             XFS_ABSDIFF(newbno1, wantbno) >
 208                             XFS_ABSDIFF(newbno2, wantbno)))
 209                                newbno1 = newbno2;
 210                } else if (newbno2 != NULLAGBLOCK)
 211                        newbno1 = newbno2;
 212        } else if (freeend >= wantend) {
 213                newbno1 = wantbno;
 214        } else if (alignment > 1) {
 215                newbno1 = roundup(freeend - wantlen, alignment);
 216                if (newbno1 > freeend - wantlen &&
 217                    newbno1 - alignment >= freebno)
 218                        newbno1 -= alignment;
 219                else if (newbno1 >= freeend)
 220                        newbno1 = NULLAGBLOCK;
 221        } else
 222                newbno1 = freeend - wantlen;
 223        *newbnop = newbno1;
 224        return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
 225}
 226
 227/*
 228 * Fix up the length, based on mod and prod.
 229 * len should be k * prod + mod for some k.
 230 * If len is too small it is returned unchanged.
 231 * If len hits maxlen it is left alone.
 232 */
 233STATIC void
 234xfs_alloc_fix_len(
 235        xfs_alloc_arg_t *args)          /* allocation argument structure */
 236{
 237        xfs_extlen_t    k;
 238        xfs_extlen_t    rlen;
 239
 240        ASSERT(args->mod < args->prod);
 241        rlen = args->len;
 242        ASSERT(rlen >= args->minlen);
 243        ASSERT(rlen <= args->maxlen);
 244        if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
 245            (args->mod == 0 && rlen < args->prod))
 246                return;
 247        k = rlen % args->prod;
 248        if (k == args->mod)
 249                return;
 250        if (k > args->mod) {
 251                if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
 252                        return;
 253        } else {
 254                if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
 255                    (int)args->minlen)
 256                        return;
 257        }
 258        ASSERT(rlen >= args->minlen);
 259        ASSERT(rlen <= args->maxlen);
 260        args->len = rlen;
 261}
 262
 263/*
 264 * Fix up length if there is too little space left in the a.g.
 265 * Return 1 if ok, 0 if too little, should give up.
 266 */
 267STATIC int
 268xfs_alloc_fix_minleft(
 269        xfs_alloc_arg_t *args)          /* allocation argument structure */
 270{
 271        xfs_agf_t       *agf;           /* a.g. freelist header */
 272        int             diff;           /* free space difference */
 273
 274        if (args->minleft == 0)
 275                return 1;
 276        agf = XFS_BUF_TO_AGF(args->agbp);
 277        diff = be32_to_cpu(agf->agf_freeblks)
 278                - args->len - args->minleft;
 279        if (diff >= 0)
 280                return 1;
 281        args->len += diff;              /* shrink the allocated space */
 282        if (args->len >= args->minlen)
 283                return 1;
 284        args->agbno = NULLAGBLOCK;
 285        return 0;
 286}
 287
 288/*
 289 * Update the two btrees, logically removing from freespace the extent
 290 * starting at rbno, rlen blocks.  The extent is contained within the
 291 * actual (current) free extent fbno for flen blocks.
 292 * Flags are passed in indicating whether the cursors are set to the
 293 * relevant records.
 294 */
 295STATIC int                              /* error code */
 296xfs_alloc_fixup_trees(
 297        xfs_btree_cur_t *cnt_cur,       /* cursor for by-size btree */
 298        xfs_btree_cur_t *bno_cur,       /* cursor for by-block btree */
 299        xfs_agblock_t   fbno,           /* starting block of free extent */
 300        xfs_extlen_t    flen,           /* length of free extent */
 301        xfs_agblock_t   rbno,           /* starting block of returned extent */
 302        xfs_extlen_t    rlen,           /* length of returned extent */
 303        int             flags)          /* flags, XFSA_FIXUP_... */
 304{
 305        int             error;          /* error code */
 306        int             i;              /* operation results */
 307        xfs_agblock_t   nfbno1;         /* first new free startblock */
 308        xfs_agblock_t   nfbno2;         /* second new free startblock */
 309        xfs_extlen_t    nflen1=0;       /* first new free length */
 310        xfs_extlen_t    nflen2=0;       /* second new free length */
 311
 312        /*
 313         * Look up the record in the by-size tree if necessary.
 314         */
 315        if (flags & XFSA_FIXUP_CNT_OK) {
 316#ifdef DEBUG
 317                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
 318                        return error;
 319                XFS_WANT_CORRUPTED_RETURN(
 320                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 321#endif
 322        } else {
 323                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
 324                        return error;
 325                XFS_WANT_CORRUPTED_RETURN(i == 1);
 326        }
 327        /*
 328         * Look up the record in the by-block tree if necessary.
 329         */
 330        if (flags & XFSA_FIXUP_BNO_OK) {
 331#ifdef DEBUG
 332                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
 333                        return error;
 334                XFS_WANT_CORRUPTED_RETURN(
 335                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 336#endif
 337        } else {
 338                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
 339                        return error;
 340                XFS_WANT_CORRUPTED_RETURN(i == 1);
 341        }
 342
 343#ifdef DEBUG
 344        if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
 345                struct xfs_btree_block  *bnoblock;
 346                struct xfs_btree_block  *cntblock;
 347
 348                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
 349                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 350
 351                XFS_WANT_CORRUPTED_RETURN(
 352                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
 353        }
 354#endif
 355
 356        /*
 357         * Deal with all four cases: the allocated record is contained
 358         * within the freespace record, so we can have new freespace
 359         * at either (or both) end, or no freespace remaining.
 360         */
 361        if (rbno == fbno && rlen == flen)
 362                nfbno1 = nfbno2 = NULLAGBLOCK;
 363        else if (rbno == fbno) {
 364                nfbno1 = rbno + rlen;
 365                nflen1 = flen - rlen;
 366                nfbno2 = NULLAGBLOCK;
 367        } else if (rbno + rlen == fbno + flen) {
 368                nfbno1 = fbno;
 369                nflen1 = flen - rlen;
 370                nfbno2 = NULLAGBLOCK;
 371        } else {
 372                nfbno1 = fbno;
 373                nflen1 = rbno - fbno;
 374                nfbno2 = rbno + rlen;
 375                nflen2 = (fbno + flen) - nfbno2;
 376        }
 377        /*
 378         * Delete the entry from the by-size btree.
 379         */
 380        if ((error = xfs_btree_delete(cnt_cur, &i)))
 381                return error;
 382        XFS_WANT_CORRUPTED_RETURN(i == 1);
 383        /*
 384         * Add new by-size btree entry(s).
 385         */
 386        if (nfbno1 != NULLAGBLOCK) {
 387                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 388                        return error;
 389                XFS_WANT_CORRUPTED_RETURN(i == 0);
 390                if ((error = xfs_btree_insert(cnt_cur, &i)))
 391                        return error;
 392                XFS_WANT_CORRUPTED_RETURN(i == 1);
 393        }
 394        if (nfbno2 != NULLAGBLOCK) {
 395                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 396                        return error;
 397                XFS_WANT_CORRUPTED_RETURN(i == 0);
 398                if ((error = xfs_btree_insert(cnt_cur, &i)))
 399                        return error;
 400                XFS_WANT_CORRUPTED_RETURN(i == 1);
 401        }
 402        /*
 403         * Fix up the by-block btree entry(s).
 404         */
 405        if (nfbno1 == NULLAGBLOCK) {
 406                /*
 407                 * No remaining freespace, just delete the by-block tree entry.
 408                 */
 409                if ((error = xfs_btree_delete(bno_cur, &i)))
 410                        return error;
 411                XFS_WANT_CORRUPTED_RETURN(i == 1);
 412        } else {
 413                /*
 414                 * Update the by-block entry to start later|be shorter.
 415                 */
 416                if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
 417                        return error;
 418        }
 419        if (nfbno2 != NULLAGBLOCK) {
 420                /*
 421                 * 2 resulting free entries, need to add one.
 422                 */
 423                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
 424                        return error;
 425                XFS_WANT_CORRUPTED_RETURN(i == 0);
 426                if ((error = xfs_btree_insert(bno_cur, &i)))
 427                        return error;
 428                XFS_WANT_CORRUPTED_RETURN(i == 1);
 429        }
 430        return 0;
 431}
 432
 433static void
 434xfs_agfl_verify(
 435        struct xfs_buf  *bp)
 436{
 437#ifdef WHEN_CRCS_COME_ALONG
 438        /*
 439         * we cannot actually do any verification of the AGFL because mkfs does
 440         * not initialise the AGFL to zero or NULL. Hence the only valid part of
 441         * the AGFL is what the AGF says is active. We can't get to the AGF, so
 442         * we can't verify just those entries are valid.
 443         *
 444         * This problem goes away when the CRC format change comes along as that
 445         * requires the AGFL to be initialised by mkfs. At that point, we can
 446         * verify the blocks in the agfl -active or not- lie within the bounds
 447         * of the AG. Until then, just leave this check ifdef'd out.
 448         */
 449        struct xfs_mount *mp = bp->b_target->bt_mount;
 450        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
 451        int             agfl_ok = 1;
 452
 453        int             i;
 454
 455        for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
 456                if (be32_to_cpu(agfl->agfl_bno[i]) == NULLAGBLOCK ||
 457                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
 458                        agfl_ok = 0;
 459        }
 460
 461        if (!agfl_ok) {
 462                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agfl);
 463                xfs_buf_ioerror(bp, EFSCORRUPTED);
 464        }
 465#endif
 466}
 467
 468static void
 469xfs_agfl_write_verify(
 470        struct xfs_buf  *bp)
 471{
 472        xfs_agfl_verify(bp);
 473}
 474
 475static void
 476xfs_agfl_read_verify(
 477        struct xfs_buf  *bp)
 478{
 479        xfs_agfl_verify(bp);
 480}
 481
 482const struct xfs_buf_ops xfs_agfl_buf_ops = {
 483        .verify_read = xfs_agfl_read_verify,
 484        .verify_write = xfs_agfl_write_verify,
 485};
 486
 487/*
 488 * Read in the allocation group free block array.
 489 */
 490STATIC int                              /* error */
 491xfs_alloc_read_agfl(
 492        xfs_mount_t     *mp,            /* mount point structure */
 493        xfs_trans_t     *tp,            /* transaction pointer */
 494        xfs_agnumber_t  agno,           /* allocation group number */
 495        xfs_buf_t       **bpp)          /* buffer for the ag free block array */
 496{
 497        xfs_buf_t       *bp;            /* return value */
 498        int             error;
 499
 500        ASSERT(agno != NULLAGNUMBER);
 501        error = xfs_trans_read_buf(
 502                        mp, tp, mp->m_ddev_targp,
 503                        XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
 504                        XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
 505        if (error)
 506                return error;
 507        ASSERT(!xfs_buf_geterror(bp));
 508        xfs_buf_set_ref(bp, XFS_AGFL_REF);
 509        *bpp = bp;
 510        return 0;
 511}
 512
 513STATIC int
 514xfs_alloc_update_counters(
 515        struct xfs_trans        *tp,
 516        struct xfs_perag        *pag,
 517        struct xfs_buf          *agbp,
 518        long                    len)
 519{
 520        struct xfs_agf          *agf = XFS_BUF_TO_AGF(agbp);
 521
 522        pag->pagf_freeblks += len;
 523        be32_add_cpu(&agf->agf_freeblks, len);
 524
 525        xfs_trans_agblocks_delta(tp, len);
 526        if (unlikely(be32_to_cpu(agf->agf_freeblks) >
 527                     be32_to_cpu(agf->agf_length)))
 528                return EFSCORRUPTED;
 529
 530        xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
 531        return 0;
 532}
 533
 534/*
 535 * Allocation group level functions.
 536 */
 537
 538/*
 539 * Allocate a variable extent in the allocation group agno.
 540 * Type and bno are used to determine where in the allocation group the
 541 * extent will start.
 542 * Extent's length (returned in *len) will be between minlen and maxlen,
 543 * and of the form k * prod + mod unless there's nothing that large.
 544 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 545 */
 546STATIC int                      /* error */
 547xfs_alloc_ag_vextent(
 548        xfs_alloc_arg_t *args)  /* argument structure for allocation */
 549{
 550        int             error=0;
 551
 552        ASSERT(args->minlen > 0);
 553        ASSERT(args->maxlen > 0);
 554        ASSERT(args->minlen <= args->maxlen);
 555        ASSERT(args->mod < args->prod);
 556        ASSERT(args->alignment > 0);
 557        /*
 558         * Branch to correct routine based on the type.
 559         */
 560        args->wasfromfl = 0;
 561        switch (args->type) {
 562        case XFS_ALLOCTYPE_THIS_AG:
 563                error = xfs_alloc_ag_vextent_size(args);
 564                break;
 565        case XFS_ALLOCTYPE_NEAR_BNO:
 566                error = xfs_alloc_ag_vextent_near(args);
 567                break;
 568        case XFS_ALLOCTYPE_THIS_BNO:
 569                error = xfs_alloc_ag_vextent_exact(args);
 570                break;
 571        default:
 572                ASSERT(0);
 573                /* NOTREACHED */
 574        }
 575
 576        if (error || args->agbno == NULLAGBLOCK)
 577                return error;
 578
 579        ASSERT(args->len >= args->minlen);
 580        ASSERT(args->len <= args->maxlen);
 581        ASSERT(!args->wasfromfl || !args->isfl);
 582        ASSERT(args->agbno % args->alignment == 0);
 583
 584        if (!args->wasfromfl) {
 585                error = xfs_alloc_update_counters(args->tp, args->pag,
 586                                                  args->agbp,
 587                                                  -((long)(args->len)));
 588                if (error)
 589                        return error;
 590
 591                ASSERT(!xfs_extent_busy_search(args->mp, args->agno,
 592                                              args->agbno, args->len));
 593        }
 594
 595        if (!args->isfl) {
 596                xfs_trans_mod_sb(args->tp, args->wasdel ?
 597                                 XFS_TRANS_SB_RES_FDBLOCKS :
 598                                 XFS_TRANS_SB_FDBLOCKS,
 599                                 -((long)(args->len)));
 600        }
 601
 602        XFS_STATS_INC(xs_allocx);
 603        XFS_STATS_ADD(xs_allocb, args->len);
 604        return error;
 605}
 606
 607/*
 608 * Allocate a variable extent at exactly agno/bno.
 609 * Extent's length (returned in *len) will be between minlen and maxlen,
 610 * and of the form k * prod + mod unless there's nothing that large.
 611 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
 612 */
 613STATIC int                      /* error */
 614xfs_alloc_ag_vextent_exact(
 615        xfs_alloc_arg_t *args)  /* allocation argument structure */
 616{
 617        xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
 618        xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
 619        int             error;
 620        xfs_agblock_t   fbno;   /* start block of found extent */
 621        xfs_extlen_t    flen;   /* length of found extent */
 622        xfs_agblock_t   tbno;   /* start block of trimmed extent */
 623        xfs_extlen_t    tlen;   /* length of trimmed extent */
 624        xfs_agblock_t   tend;   /* end block of trimmed extent */
 625        int             i;      /* success/failure of operation */
 626
 627        ASSERT(args->alignment == 1);
 628
 629        /*
 630         * Allocate/initialize a cursor for the by-number freespace btree.
 631         */
 632        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 633                                          args->agno, XFS_BTNUM_BNO);
 634
 635        /*
 636         * Lookup bno and minlen in the btree (minlen is irrelevant, really).
 637         * Look for the closest free block <= bno, it must contain bno
 638         * if any free block does.
 639         */
 640        error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
 641        if (error)
 642                goto error0;
 643        if (!i)
 644                goto not_found;
 645
 646        /*
 647         * Grab the freespace record.
 648         */
 649        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
 650        if (error)
 651                goto error0;
 652        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 653        ASSERT(fbno <= args->agbno);
 654
 655        /*
 656         * Check for overlapping busy extents.
 657         */
 658        xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen);
 659
 660        /*
 661         * Give up if the start of the extent is busy, or the freespace isn't
 662         * long enough for the minimum request.
 663         */
 664        if (tbno > args->agbno)
 665                goto not_found;
 666        if (tlen < args->minlen)
 667                goto not_found;
 668        tend = tbno + tlen;
 669        if (tend < args->agbno + args->minlen)
 670                goto not_found;
 671
 672        /*
 673         * End of extent will be smaller of the freespace end and the
 674         * maximal requested end.
 675         *
 676         * Fix the length according to mod and prod if given.
 677         */
 678        args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
 679                                                - args->agbno;
 680        xfs_alloc_fix_len(args);
 681        if (!xfs_alloc_fix_minleft(args))
 682                goto not_found;
 683
 684        ASSERT(args->agbno + args->len <= tend);
 685
 686        /*
 687         * We are allocating agbno for args->len
 688         * Allocate/initialize a cursor for the by-size btree.
 689         */
 690        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 691                args->agno, XFS_BTNUM_CNT);
 692        ASSERT(args->agbno + args->len <=
 693                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 694        error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 695                                      args->len, XFSA_FIXUP_BNO_OK);
 696        if (error) {
 697                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 698                goto error0;
 699        }
 700
 701        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 702        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 703
 704        args->wasfromfl = 0;
 705        trace_xfs_alloc_exact_done(args);
 706        return 0;
 707
 708not_found:
 709        /* Didn't find it, return null. */
 710        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
 711        args->agbno = NULLAGBLOCK;
 712        trace_xfs_alloc_exact_notfound(args);
 713        return 0;
 714
 715error0:
 716        xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
 717        trace_xfs_alloc_exact_error(args);
 718        return error;
 719}
 720
 721/*
 722 * Search the btree in a given direction via the search cursor and compare
 723 * the records found against the good extent we've already found.
 724 */
 725STATIC int
 726xfs_alloc_find_best_extent(
 727        struct xfs_alloc_arg    *args,  /* allocation argument structure */
 728        struct xfs_btree_cur    **gcur, /* good cursor */
 729        struct xfs_btree_cur    **scur, /* searching cursor */
 730        xfs_agblock_t           gdiff,  /* difference for search comparison */
 731        xfs_agblock_t           *sbno,  /* extent found by search */
 732        xfs_extlen_t            *slen,  /* extent length */
 733        xfs_agblock_t           *sbnoa, /* aligned extent found by search */
 734        xfs_extlen_t            *slena, /* aligned extent length */
 735        int                     dir)    /* 0 = search right, 1 = search left */
 736{
 737        xfs_agblock_t           new;
 738        xfs_agblock_t           sdiff;
 739        int                     error;
 740        int                     i;
 741
 742        /* The good extent is perfect, no need to  search. */
 743        if (!gdiff)
 744                goto out_use_good;
 745
 746        /*
 747         * Look until we find a better one, run out of space or run off the end.
 748         */
 749        do {
 750                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
 751                if (error)
 752                        goto error0;
 753                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 754                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 755
 756                /*
 757                 * The good extent is closer than this one.
 758                 */
 759                if (!dir) {
 760                        if (*sbnoa >= args->agbno + gdiff)
 761                                goto out_use_good;
 762                } else {
 763                        if (*sbnoa <= args->agbno - gdiff)
 764                                goto out_use_good;
 765                }
 766
 767                /*
 768                 * Same distance, compare length and pick the best.
 769                 */
 770                if (*slena >= args->minlen) {
 771                        args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
 772                        xfs_alloc_fix_len(args);
 773
 774                        sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 775                                                       args->alignment, *sbnoa,
 776                                                       *slena, &new);
 777
 778                        /*
 779                         * Choose closer size and invalidate other cursor.
 780                         */
 781                        if (sdiff < gdiff)
 782                                goto out_use_search;
 783                        goto out_use_good;
 784                }
 785
 786                if (!dir)
 787                        error = xfs_btree_increment(*scur, 0, &i);
 788                else
 789                        error = xfs_btree_decrement(*scur, 0, &i);
 790                if (error)
 791                        goto error0;
 792        } while (i);
 793
 794out_use_good:
 795        xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
 796        *scur = NULL;
 797        return 0;
 798
 799out_use_search:
 800        xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
 801        *gcur = NULL;
 802        return 0;
 803
 804error0:
 805        /* caller invalidates cursors */
 806        return error;
 807}
 808
 809/*
 810 * Allocate a variable extent near bno in the allocation group agno.
 811 * Extent's length (returned in len) will be between minlen and maxlen,
 812 * and of the form k * prod + mod unless there's nothing that large.
 813 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
 814 */
 815STATIC int                              /* error */
 816xfs_alloc_ag_vextent_near(
 817        xfs_alloc_arg_t *args)          /* allocation argument structure */
 818{
 819        xfs_btree_cur_t *bno_cur_gt;    /* cursor for bno btree, right side */
 820        xfs_btree_cur_t *bno_cur_lt;    /* cursor for bno btree, left side */
 821        xfs_btree_cur_t *cnt_cur;       /* cursor for count btree */
 822        xfs_agblock_t   gtbno;          /* start bno of right side entry */
 823        xfs_agblock_t   gtbnoa;         /* aligned ... */
 824        xfs_extlen_t    gtdiff;         /* difference to right side entry */
 825        xfs_extlen_t    gtlen;          /* length of right side entry */
 826        xfs_extlen_t    gtlena;         /* aligned ... */
 827        xfs_agblock_t   gtnew;          /* useful start bno of right side */
 828        int             error;          /* error code */
 829        int             i;              /* result code, temporary */
 830        int             j;              /* result code, temporary */
 831        xfs_agblock_t   ltbno;          /* start bno of left side entry */
 832        xfs_agblock_t   ltbnoa;         /* aligned ... */
 833        xfs_extlen_t    ltdiff;         /* difference to left side entry */
 834        xfs_extlen_t    ltlen;          /* length of left side entry */
 835        xfs_extlen_t    ltlena;         /* aligned ... */
 836        xfs_agblock_t   ltnew;          /* useful start bno of left side */
 837        xfs_extlen_t    rlen;           /* length of returned extent */
 838        int             forced = 0;
 839#if defined(DEBUG) && defined(__KERNEL__)
 840        /*
 841         * Randomly don't execute the first algorithm.
 842         */
 843        int             dofirst;        /* set to do first algorithm */
 844
 845        dofirst = random32() & 1;
 846#endif
 847
 848restart:
 849        bno_cur_lt = NULL;
 850        bno_cur_gt = NULL;
 851        ltlen = 0;
 852        gtlena = 0;
 853        ltlena = 0;
 854
 855        /*
 856         * Get a cursor for the by-size btree.
 857         */
 858        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 859                args->agno, XFS_BTNUM_CNT);
 860
 861        /*
 862         * See if there are any free extents as big as maxlen.
 863         */
 864        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
 865                goto error0;
 866        /*
 867         * If none, then pick up the last entry in the tree unless the
 868         * tree is empty.
 869         */
 870        if (!i) {
 871                if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
 872                                &ltlen, &i)))
 873                        goto error0;
 874                if (i == 0 || ltlen == 0) {
 875                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 876                        trace_xfs_alloc_near_noentry(args);
 877                        return 0;
 878                }
 879                ASSERT(i == 1);
 880        }
 881        args->wasfromfl = 0;
 882
 883        /*
 884         * First algorithm.
 885         * If the requested extent is large wrt the freespaces available
 886         * in this a.g., then the cursor will be pointing to a btree entry
 887         * near the right edge of the tree.  If it's in the last btree leaf
 888         * block, then we just examine all the entries in that block
 889         * that are big enough, and pick the best one.
 890         * This is written as a while loop so we can break out of it,
 891         * but we never loop back to the top.
 892         */
 893        while (xfs_btree_islastblock(cnt_cur, 0)) {
 894                xfs_extlen_t    bdiff;
 895                int             besti=0;
 896                xfs_extlen_t    blen=0;
 897                xfs_agblock_t   bnew=0;
 898
 899#if defined(DEBUG) && defined(__KERNEL__)
 900                if (!dofirst)
 901                        break;
 902#endif
 903                /*
 904                 * Start from the entry that lookup found, sequence through
 905                 * all larger free blocks.  If we're actually pointing at a
 906                 * record smaller than maxlen, go to the start of this block,
 907                 * and skip all those smaller than minlen.
 908                 */
 909                if (ltlen || args->alignment > 1) {
 910                        cnt_cur->bc_ptrs[0] = 1;
 911                        do {
 912                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
 913                                                &ltlen, &i)))
 914                                        goto error0;
 915                                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 916                                if (ltlen >= args->minlen)
 917                                        break;
 918                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
 919                                        goto error0;
 920                        } while (i);
 921                        ASSERT(ltlen >= args->minlen);
 922                        if (!i)
 923                                break;
 924                }
 925                i = cnt_cur->bc_ptrs[0];
 926                for (j = 1, blen = 0, bdiff = 0;
 927                     !error && j && (blen < args->maxlen || bdiff > 0);
 928                     error = xfs_btree_increment(cnt_cur, 0, &j)) {
 929                        /*
 930                         * For each entry, decide if it's better than
 931                         * the previous best entry.
 932                         */
 933                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
 934                                goto error0;
 935                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 936                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
 937                                                  &ltbnoa, &ltlena);
 938                        if (ltlena < args->minlen)
 939                                continue;
 940                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
 941                        xfs_alloc_fix_len(args);
 942                        ASSERT(args->len >= args->minlen);
 943                        if (args->len < blen)
 944                                continue;
 945                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
 946                                args->alignment, ltbnoa, ltlena, &ltnew);
 947                        if (ltnew != NULLAGBLOCK &&
 948                            (args->len > blen || ltdiff < bdiff)) {
 949                                bdiff = ltdiff;
 950                                bnew = ltnew;
 951                                blen = args->len;
 952                                besti = cnt_cur->bc_ptrs[0];
 953                        }
 954                }
 955                /*
 956                 * It didn't work.  We COULD be in a case where
 957                 * there's a good record somewhere, so try again.
 958                 */
 959                if (blen == 0)
 960                        break;
 961                /*
 962                 * Point at the best entry, and retrieve it again.
 963                 */
 964                cnt_cur->bc_ptrs[0] = besti;
 965                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
 966                        goto error0;
 967                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
 968                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 969                args->len = blen;
 970                if (!xfs_alloc_fix_minleft(args)) {
 971                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 972                        trace_xfs_alloc_near_nominleft(args);
 973                        return 0;
 974                }
 975                blen = args->len;
 976                /*
 977                 * We are allocating starting at bnew for blen blocks.
 978                 */
 979                args->agbno = bnew;
 980                ASSERT(bnew >= ltbno);
 981                ASSERT(bnew + blen <= ltbno + ltlen);
 982                /*
 983                 * Set up a cursor for the by-bno tree.
 984                 */
 985                bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
 986                        args->agbp, args->agno, XFS_BTNUM_BNO);
 987                /*
 988                 * Fix up the btree entries.
 989                 */
 990                if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
 991                                ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
 992                        goto error0;
 993                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 994                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
 995
 996                trace_xfs_alloc_near_first(args);
 997                return 0;
 998        }
 999        /*
1000         * Second algorithm.
1001         * Search in the by-bno tree to the left and to the right
1002         * simultaneously, until in each case we find a space big enough,
1003         * or run into the edge of the tree.  When we run into the edge,
1004         * we deallocate that cursor.
1005         * If both searches succeed, we compare the two spaces and pick
1006         * the better one.
1007         * With alignment, it's possible for both to fail; the upper
1008         * level algorithm that picks allocation groups for allocations
1009         * is not supposed to do this.
1010         */
1011        /*
1012         * Allocate and initialize the cursor for the leftward search.
1013         */
1014        bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1015                args->agno, XFS_BTNUM_BNO);
1016        /*
1017         * Lookup <= bno to find the leftward search's starting point.
1018         */
1019        if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
1020                goto error0;
1021        if (!i) {
1022                /*
1023                 * Didn't find anything; use this cursor for the rightward
1024                 * search.
1025                 */
1026                bno_cur_gt = bno_cur_lt;
1027                bno_cur_lt = NULL;
1028        }
1029        /*
1030         * Found something.  Duplicate the cursor for the rightward search.
1031         */
1032        else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
1033                goto error0;
1034        /*
1035         * Increment the cursor, so we will point at the entry just right
1036         * of the leftward entry if any, or to the leftmost entry.
1037         */
1038        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1039                goto error0;
1040        if (!i) {
1041                /*
1042                 * It failed, there are no rightward entries.
1043                 */
1044                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
1045                bno_cur_gt = NULL;
1046        }
1047        /*
1048         * Loop going left with the leftward cursor, right with the
1049         * rightward cursor, until either both directions give up or
1050         * we find an entry at least as big as minlen.
1051         */
1052        do {
1053                if (bno_cur_lt) {
1054                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1055                                goto error0;
1056                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1057                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
1058                                                  &ltbnoa, &ltlena);
1059                        if (ltlena >= args->minlen)
1060                                break;
1061                        if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
1062                                goto error0;
1063                        if (!i) {
1064                                xfs_btree_del_cursor(bno_cur_lt,
1065                                                     XFS_BTREE_NOERROR);
1066                                bno_cur_lt = NULL;
1067                        }
1068                }
1069                if (bno_cur_gt) {
1070                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1071                                goto error0;
1072                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1073                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
1074                                                  &gtbnoa, &gtlena);
1075                        if (gtlena >= args->minlen)
1076                                break;
1077                        if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
1078                                goto error0;
1079                        if (!i) {
1080                                xfs_btree_del_cursor(bno_cur_gt,
1081                                                     XFS_BTREE_NOERROR);
1082                                bno_cur_gt = NULL;
1083                        }
1084                }
1085        } while (bno_cur_lt || bno_cur_gt);
1086
1087        /*
1088         * Got both cursors still active, need to find better entry.
1089         */
1090        if (bno_cur_lt && bno_cur_gt) {
1091                if (ltlena >= args->minlen) {
1092                        /*
1093                         * Left side is good, look for a right side entry.
1094                         */
1095                        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1096                        xfs_alloc_fix_len(args);
1097                        ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1098                                args->alignment, ltbnoa, ltlena, &ltnew);
1099
1100                        error = xfs_alloc_find_best_extent(args,
1101                                                &bno_cur_lt, &bno_cur_gt,
1102                                                ltdiff, &gtbno, &gtlen,
1103                                                &gtbnoa, &gtlena,
1104                                                0 /* search right */);
1105                } else {
1106                        ASSERT(gtlena >= args->minlen);
1107
1108                        /*
1109                         * Right side is good, look for a left side entry.
1110                         */
1111                        args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1112                        xfs_alloc_fix_len(args);
1113                        gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1114                                args->alignment, gtbnoa, gtlena, &gtnew);
1115
1116                        error = xfs_alloc_find_best_extent(args,
1117                                                &bno_cur_gt, &bno_cur_lt,
1118                                                gtdiff, &ltbno, &ltlen,
1119                                                &ltbnoa, &ltlena,
1120                                                1 /* search left */);
1121                }
1122
1123                if (error)
1124                        goto error0;
1125        }
1126
1127        /*
1128         * If we couldn't get anything, give up.
1129         */
1130        if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1131                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1132
1133                if (!forced++) {
1134                        trace_xfs_alloc_near_busy(args);
1135                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1136                        goto restart;
1137                }
1138                trace_xfs_alloc_size_neither(args);
1139                args->agbno = NULLAGBLOCK;
1140                return 0;
1141        }
1142
1143        /*
1144         * At this point we have selected a freespace entry, either to the
1145         * left or to the right.  If it's on the right, copy all the
1146         * useful variables to the "left" set so we only have one
1147         * copy of this code.
1148         */
1149        if (bno_cur_gt) {
1150                bno_cur_lt = bno_cur_gt;
1151                bno_cur_gt = NULL;
1152                ltbno = gtbno;
1153                ltbnoa = gtbnoa;
1154                ltlen = gtlen;
1155                ltlena = gtlena;
1156                j = 1;
1157        } else
1158                j = 0;
1159
1160        /*
1161         * Fix up the length and compute the useful address.
1162         */
1163        args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1164        xfs_alloc_fix_len(args);
1165        if (!xfs_alloc_fix_minleft(args)) {
1166                trace_xfs_alloc_near_nominleft(args);
1167                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1168                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1169                return 0;
1170        }
1171        rlen = args->len;
1172        (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1173                                     ltbnoa, ltlena, &ltnew);
1174        ASSERT(ltnew >= ltbno);
1175        ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1176        ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1177        args->agbno = ltnew;
1178
1179        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1180                        ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1181                goto error0;
1182
1183        if (j)
1184                trace_xfs_alloc_near_greater(args);
1185        else
1186                trace_xfs_alloc_near_lesser(args);
1187
1188        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1189        xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1190        return 0;
1191
1192 error0:
1193        trace_xfs_alloc_near_error(args);
1194        if (cnt_cur != NULL)
1195                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1196        if (bno_cur_lt != NULL)
1197                xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1198        if (bno_cur_gt != NULL)
1199                xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1200        return error;
1201}
1202
1203/*
1204 * Allocate a variable extent anywhere in the allocation group agno.
1205 * Extent's length (returned in len) will be between minlen and maxlen,
1206 * and of the form k * prod + mod unless there's nothing that large.
1207 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1208 */
1209STATIC int                              /* error */
1210xfs_alloc_ag_vextent_size(
1211        xfs_alloc_arg_t *args)          /* allocation argument structure */
1212{
1213        xfs_btree_cur_t *bno_cur;       /* cursor for bno btree */
1214        xfs_btree_cur_t *cnt_cur;       /* cursor for cnt btree */
1215        int             error;          /* error result */
1216        xfs_agblock_t   fbno;           /* start of found freespace */
1217        xfs_extlen_t    flen;           /* length of found freespace */
1218        int             i;              /* temp status variable */
1219        xfs_agblock_t   rbno;           /* returned block number */
1220        xfs_extlen_t    rlen;           /* length of returned extent */
1221        int             forced = 0;
1222
1223restart:
1224        /*
1225         * Allocate and initialize a cursor for the by-size btree.
1226         */
1227        cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1228                args->agno, XFS_BTNUM_CNT);
1229        bno_cur = NULL;
1230
1231        /*
1232         * Look for an entry >= maxlen+alignment-1 blocks.
1233         */
1234        if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1235                        args->maxlen + args->alignment - 1, &i)))
1236                goto error0;
1237
1238        /*
1239         * If none or we have busy extents that we cannot allocate from, then
1240         * we have to settle for a smaller extent. In the case that there are
1241         * no large extents, this will return the last entry in the tree unless
1242         * the tree is empty. In the case that there are only busy large
1243         * extents, this will return the largest small extent unless there
1244         * are no smaller extents available.
1245         */
1246        if (!i || forced > 1) {
1247                error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1248                                                   &fbno, &flen, &i);
1249                if (error)
1250                        goto error0;
1251                if (i == 0 || flen == 0) {
1252                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1253                        trace_xfs_alloc_size_noentry(args);
1254                        return 0;
1255                }
1256                ASSERT(i == 1);
1257                xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1258        } else {
1259                /*
1260                 * Search for a non-busy extent that is large enough.
1261                 * If we are at low space, don't check, or if we fall of
1262                 * the end of the btree, turn off the busy check and
1263                 * restart.
1264                 */
1265                for (;;) {
1266                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1267                        if (error)
1268                                goto error0;
1269                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1270
1271                        xfs_alloc_compute_aligned(args, fbno, flen,
1272                                                  &rbno, &rlen);
1273
1274                        if (rlen >= args->maxlen)
1275                                break;
1276
1277                        error = xfs_btree_increment(cnt_cur, 0, &i);
1278                        if (error)
1279                                goto error0;
1280                        if (i == 0) {
1281                                /*
1282                                 * Our only valid extents must have been busy.
1283                                 * Make it unbusy by forcing the log out and
1284                                 * retrying. If we've been here before, forcing
1285                                 * the log isn't making the extents available,
1286                                 * which means they have probably been freed in
1287                                 * this transaction.  In that case, we have to
1288                                 * give up on them and we'll attempt a minlen
1289                                 * allocation the next time around.
1290                                 */
1291                                xfs_btree_del_cursor(cnt_cur,
1292                                                     XFS_BTREE_NOERROR);
1293                                trace_xfs_alloc_size_busy(args);
1294                                if (!forced++)
1295                                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1296                                goto restart;
1297                        }
1298                }
1299        }
1300
1301        /*
1302         * In the first case above, we got the last entry in the
1303         * by-size btree.  Now we check to see if the space hits maxlen
1304         * once aligned; if not, we search left for something better.
1305         * This can't happen in the second case above.
1306         */
1307        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1308        XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1309                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1310        if (rlen < args->maxlen) {
1311                xfs_agblock_t   bestfbno;
1312                xfs_extlen_t    bestflen;
1313                xfs_agblock_t   bestrbno;
1314                xfs_extlen_t    bestrlen;
1315
1316                bestrlen = rlen;
1317                bestrbno = rbno;
1318                bestflen = flen;
1319                bestfbno = fbno;
1320                for (;;) {
1321                        if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1322                                goto error0;
1323                        if (i == 0)
1324                                break;
1325                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1326                                        &i)))
1327                                goto error0;
1328                        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1329                        if (flen < bestrlen)
1330                                break;
1331                        xfs_alloc_compute_aligned(args, fbno, flen,
1332                                                  &rbno, &rlen);
1333                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1334                        XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1335                                (rlen <= flen && rbno + rlen <= fbno + flen),
1336                                error0);
1337                        if (rlen > bestrlen) {
1338                                bestrlen = rlen;
1339                                bestrbno = rbno;
1340                                bestflen = flen;
1341                                bestfbno = fbno;
1342                                if (rlen == args->maxlen)
1343                                        break;
1344                        }
1345                }
1346                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1347                                &i)))
1348                        goto error0;
1349                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1350                rlen = bestrlen;
1351                rbno = bestrbno;
1352                flen = bestflen;
1353                fbno = bestfbno;
1354        }
1355        args->wasfromfl = 0;
1356        /*
1357         * Fix up the length.
1358         */
1359        args->len = rlen;
1360        if (rlen < args->minlen) {
1361                if (!forced++) {
1362                        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1363                        trace_xfs_alloc_size_busy(args);
1364                        xfs_log_force(args->mp, XFS_LOG_SYNC);
1365                        goto restart;
1366                }
1367                goto out_nominleft;
1368        }
1369        xfs_alloc_fix_len(args);
1370
1371        if (!xfs_alloc_fix_minleft(args))
1372                goto out_nominleft;
1373        rlen = args->len;
1374        XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1375        /*
1376         * Allocate and initialize a cursor for the by-block tree.
1377         */
1378        bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1379                args->agno, XFS_BTNUM_BNO);
1380        if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1381                        rbno, rlen, XFSA_FIXUP_CNT_OK)))
1382                goto error0;
1383        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1384        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1385        cnt_cur = bno_cur = NULL;
1386        args->len = rlen;
1387        args->agbno = rbno;
1388        XFS_WANT_CORRUPTED_GOTO(
1389                args->agbno + args->len <=
1390                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1391                error0);
1392        trace_xfs_alloc_size_done(args);
1393        return 0;
1394
1395error0:
1396        trace_xfs_alloc_size_error(args);
1397        if (cnt_cur)
1398                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1399        if (bno_cur)
1400                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1401        return error;
1402
1403out_nominleft:
1404        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1405        trace_xfs_alloc_size_nominleft(args);
1406        args->agbno = NULLAGBLOCK;
1407        return 0;
1408}
1409
1410/*
1411 * Deal with the case where only small freespaces remain.
1412 * Either return the contents of the last freespace record,
1413 * or allocate space from the freelist if there is nothing in the tree.
1414 */
1415STATIC int                      /* error */
1416xfs_alloc_ag_vextent_small(
1417        xfs_alloc_arg_t *args,  /* allocation argument structure */
1418        xfs_btree_cur_t *ccur,  /* by-size cursor */
1419        xfs_agblock_t   *fbnop, /* result block number */
1420        xfs_extlen_t    *flenp, /* result length */
1421        int             *stat)  /* status: 0-freelist, 1-normal/none */
1422{
1423        int             error;
1424        xfs_agblock_t   fbno;
1425        xfs_extlen_t    flen;
1426        int             i;
1427
1428        if ((error = xfs_btree_decrement(ccur, 0, &i)))
1429                goto error0;
1430        if (i) {
1431                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1432                        goto error0;
1433                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1434        }
1435        /*
1436         * Nothing in the btree, try the freelist.  Make sure
1437         * to respect minleft even when pulling from the
1438         * freelist.
1439         */
1440        else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1441                 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1442                  > args->minleft)) {
1443                error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1444                if (error)
1445                        goto error0;
1446                if (fbno != NULLAGBLOCK) {
1447                        xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1448                                             args->userdata);
1449
1450                        if (args->userdata) {
1451                                xfs_buf_t       *bp;
1452
1453                                bp = xfs_btree_get_bufs(args->mp, args->tp,
1454                                        args->agno, fbno, 0);
1455                                xfs_trans_binval(args->tp, bp);
1456                        }
1457                        args->len = 1;
1458                        args->agbno = fbno;
1459                        XFS_WANT_CORRUPTED_GOTO(
1460                                args->agbno + args->len <=
1461                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1462                                error0);
1463                        args->wasfromfl = 1;
1464                        trace_xfs_alloc_small_freelist(args);
1465                        *stat = 0;
1466                        return 0;
1467                }
1468                /*
1469                 * Nothing in the freelist.
1470                 */
1471                else
1472                        flen = 0;
1473        }
1474        /*
1475         * Can't allocate from the freelist for some reason.
1476         */
1477        else {
1478                fbno = NULLAGBLOCK;
1479                flen = 0;
1480        }
1481        /*
1482         * Can't do the allocation, give up.
1483         */
1484        if (flen < args->minlen) {
1485                args->agbno = NULLAGBLOCK;
1486                trace_xfs_alloc_small_notenough(args);
1487                flen = 0;
1488        }
1489        *fbnop = fbno;
1490        *flenp = flen;
1491        *stat = 1;
1492        trace_xfs_alloc_small_done(args);
1493        return 0;
1494
1495error0:
1496        trace_xfs_alloc_small_error(args);
1497        return error;
1498}
1499
1500/*
1501 * Free the extent starting at agno/bno for length.
1502 */
1503STATIC int                      /* error */
1504xfs_free_ag_extent(
1505        xfs_trans_t     *tp,    /* transaction pointer */
1506        xfs_buf_t       *agbp,  /* buffer for a.g. freelist header */
1507        xfs_agnumber_t  agno,   /* allocation group number */
1508        xfs_agblock_t   bno,    /* starting block number */
1509        xfs_extlen_t    len,    /* length of extent */
1510        int             isfl)   /* set if is freelist blocks - no sb acctg */
1511{
1512        xfs_btree_cur_t *bno_cur;       /* cursor for by-block btree */
1513        xfs_btree_cur_t *cnt_cur;       /* cursor for by-size btree */
1514        int             error;          /* error return value */
1515        xfs_agblock_t   gtbno;          /* start of right neighbor block */
1516        xfs_extlen_t    gtlen;          /* length of right neighbor block */
1517        int             haveleft;       /* have a left neighbor block */
1518        int             haveright;      /* have a right neighbor block */
1519        int             i;              /* temp, result code */
1520        xfs_agblock_t   ltbno;          /* start of left neighbor block */
1521        xfs_extlen_t    ltlen;          /* length of left neighbor block */
1522        xfs_mount_t     *mp;            /* mount point struct for filesystem */
1523        xfs_agblock_t   nbno;           /* new starting block of freespace */
1524        xfs_extlen_t    nlen;           /* new length of freespace */
1525        xfs_perag_t     *pag;           /* per allocation group data */
1526
1527        mp = tp->t_mountp;
1528        /*
1529         * Allocate and initialize a cursor for the by-block btree.
1530         */
1531        bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
1532        cnt_cur = NULL;
1533        /*
1534         * Look for a neighboring block on the left (lower block numbers)
1535         * that is contiguous with this space.
1536         */
1537        if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1538                goto error0;
1539        if (haveleft) {
1540                /*
1541                 * There is a block to our left.
1542                 */
1543                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1544                        goto error0;
1545                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1546                /*
1547                 * It's not contiguous, though.
1548                 */
1549                if (ltbno + ltlen < bno)
1550                        haveleft = 0;
1551                else {
1552                        /*
1553                         * If this failure happens the request to free this
1554                         * space was invalid, it's (partly) already free.
1555                         * Very bad.
1556                         */
1557                        XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1558                }
1559        }
1560        /*
1561         * Look for a neighboring block on the right (higher block numbers)
1562         * that is contiguous with this space.
1563         */
1564        if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
1565                goto error0;
1566        if (haveright) {
1567                /*
1568                 * There is a block to our right.
1569                 */
1570                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1571                        goto error0;
1572                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1573                /*
1574                 * It's not contiguous, though.
1575                 */
1576                if (bno + len < gtbno)
1577                        haveright = 0;
1578                else {
1579                        /*
1580                         * If this failure happens the request to free this
1581                         * space was invalid, it's (partly) already free.
1582                         * Very bad.
1583                         */
1584                        XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1585                }
1586        }
1587        /*
1588         * Now allocate and initialize a cursor for the by-size tree.
1589         */
1590        cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
1591        /*
1592         * Have both left and right contiguous neighbors.
1593         * Merge all three into a single free block.
1594         */
1595        if (haveleft && haveright) {
1596                /*
1597                 * Delete the old by-size entry on the left.
1598                 */
1599                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1600                        goto error0;
1601                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1602                if ((error = xfs_btree_delete(cnt_cur, &i)))
1603                        goto error0;
1604                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1605                /*
1606                 * Delete the old by-size entry on the right.
1607                 */
1608                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1609                        goto error0;
1610                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1611                if ((error = xfs_btree_delete(cnt_cur, &i)))
1612                        goto error0;
1613                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1614                /*
1615                 * Delete the old by-block entry for the right block.
1616                 */
1617                if ((error = xfs_btree_delete(bno_cur, &i)))
1618                        goto error0;
1619                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1620                /*
1621                 * Move the by-block cursor back to the left neighbor.
1622                 */
1623                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1624                        goto error0;
1625                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1626#ifdef DEBUG
1627                /*
1628                 * Check that this is the right record: delete didn't
1629                 * mangle the cursor.
1630                 */
1631                {
1632                        xfs_agblock_t   xxbno;
1633                        xfs_extlen_t    xxlen;
1634
1635                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1636                                        &i)))
1637                                goto error0;
1638                        XFS_WANT_CORRUPTED_GOTO(
1639                                i == 1 && xxbno == ltbno && xxlen == ltlen,
1640                                error0);
1641                }
1642#endif
1643                /*
1644                 * Update remaining by-block entry to the new, joined block.
1645                 */
1646                nbno = ltbno;
1647                nlen = len + ltlen + gtlen;
1648                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1649                        goto error0;
1650        }
1651        /*
1652         * Have only a left contiguous neighbor.
1653         * Merge it together with the new freespace.
1654         */
1655        else if (haveleft) {
1656                /*
1657                 * Delete the old by-size entry on the left.
1658                 */
1659                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1660                        goto error0;
1661                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1662                if ((error = xfs_btree_delete(cnt_cur, &i)))
1663                        goto error0;
1664                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1665                /*
1666                 * Back up the by-block cursor to the left neighbor, and
1667                 * update its length.
1668                 */
1669                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
1670                        goto error0;
1671                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1672                nbno = ltbno;
1673                nlen = len + ltlen;
1674                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1675                        goto error0;
1676        }
1677        /*
1678         * Have only a right contiguous neighbor.
1679         * Merge it together with the new freespace.
1680         */
1681        else if (haveright) {
1682                /*
1683                 * Delete the old by-size entry on the right.
1684                 */
1685                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1686                        goto error0;
1687                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1688                if ((error = xfs_btree_delete(cnt_cur, &i)))
1689                        goto error0;
1690                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1691                /*
1692                 * Update the starting block and length of the right
1693                 * neighbor in the by-block tree.
1694                 */
1695                nbno = bno;
1696                nlen = len + gtlen;
1697                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1698                        goto error0;
1699        }
1700        /*
1701         * No contiguous neighbors.
1702         * Insert the new freespace into the by-block tree.
1703         */
1704        else {
1705                nbno = bno;
1706                nlen = len;
1707                if ((error = xfs_btree_insert(bno_cur, &i)))
1708                        goto error0;
1709                XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1710        }
1711        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1712        bno_cur = NULL;
1713        /*
1714         * In all cases we need to insert the new freespace in the by-size tree.
1715         */
1716        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1717                goto error0;
1718        XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1719        if ((error = xfs_btree_insert(cnt_cur, &i)))
1720                goto error0;
1721        XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1722        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1723        cnt_cur = NULL;
1724
1725        /*
1726         * Update the freespace totals in the ag and superblock.
1727         */
1728        pag = xfs_perag_get(mp, agno);
1729        error = xfs_alloc_update_counters(tp, pag, agbp, len);
1730        xfs_perag_put(pag);
1731        if (error)
1732                goto error0;
1733
1734        if (!isfl)
1735                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1736        XFS_STATS_INC(xs_freex);
1737        XFS_STATS_ADD(xs_freeb, len);
1738
1739        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1740
1741        return 0;
1742
1743 error0:
1744        trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
1745        if (bno_cur)
1746                xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1747        if (cnt_cur)
1748                xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1749        return error;
1750}
1751
1752/*
1753 * Visible (exported) allocation/free functions.
1754 * Some of these are used just by xfs_alloc_btree.c and this file.
1755 */
1756
1757/*
1758 * Compute and fill in value of m_ag_maxlevels.
1759 */
1760void
1761xfs_alloc_compute_maxlevels(
1762        xfs_mount_t     *mp)    /* file system mount structure */
1763{
1764        int             level;
1765        uint            maxblocks;
1766        uint            maxleafents;
1767        int             minleafrecs;
1768        int             minnoderecs;
1769
1770        maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1771        minleafrecs = mp->m_alloc_mnr[0];
1772        minnoderecs = mp->m_alloc_mnr[1];
1773        maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1774        for (level = 1; maxblocks > 1; level++)
1775                maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1776        mp->m_ag_maxlevels = level;
1777}
1778
1779/*
1780 * Find the length of the longest extent in an AG.
1781 */
1782xfs_extlen_t
1783xfs_alloc_longest_free_extent(
1784        struct xfs_mount        *mp,
1785        struct xfs_perag        *pag)
1786{
1787        xfs_extlen_t            need, delta = 0;
1788
1789        need = XFS_MIN_FREELIST_PAG(pag, mp);
1790        if (need > pag->pagf_flcount)
1791                delta = need - pag->pagf_flcount;
1792
1793        if (pag->pagf_longest > delta)
1794                return pag->pagf_longest - delta;
1795        return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1796}
1797
1798/*
1799 * Decide whether to use this allocation group for this allocation.
1800 * If so, fix up the btree freelist's size.
1801 */
1802STATIC int                      /* error */
1803xfs_alloc_fix_freelist(
1804        xfs_alloc_arg_t *args,  /* allocation argument structure */
1805        int             flags)  /* XFS_ALLOC_FLAG_... */
1806{
1807        xfs_buf_t       *agbp;  /* agf buffer pointer */
1808        xfs_agf_t       *agf;   /* a.g. freespace structure pointer */
1809        xfs_buf_t       *agflbp;/* agfl buffer pointer */
1810        xfs_agblock_t   bno;    /* freelist block */
1811        xfs_extlen_t    delta;  /* new blocks needed in freelist */
1812        int             error;  /* error result code */
1813        xfs_extlen_t    longest;/* longest extent in allocation group */
1814        xfs_mount_t     *mp;    /* file system mount point structure */
1815        xfs_extlen_t    need;   /* total blocks needed in freelist */
1816        xfs_perag_t     *pag;   /* per-ag information structure */
1817        xfs_alloc_arg_t targs;  /* local allocation arguments */
1818        xfs_trans_t     *tp;    /* transaction pointer */
1819
1820        mp = args->mp;
1821
1822        pag = args->pag;
1823        tp = args->tp;
1824        if (!pag->pagf_init) {
1825                if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1826                                &agbp)))
1827                        return error;
1828                if (!pag->pagf_init) {
1829                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1830                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1831                        args->agbp = NULL;
1832                        return 0;
1833                }
1834        } else
1835                agbp = NULL;
1836
1837        /*
1838         * If this is a metadata preferred pag and we are user data
1839         * then try somewhere else if we are not being asked to
1840         * try harder at this point
1841         */
1842        if (pag->pagf_metadata && args->userdata &&
1843            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1844                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1845                args->agbp = NULL;
1846                return 0;
1847        }
1848
1849        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1850                /*
1851                 * If it looks like there isn't a long enough extent, or enough
1852                 * total blocks, reject it.
1853                 */
1854                need = XFS_MIN_FREELIST_PAG(pag, mp);
1855                longest = xfs_alloc_longest_free_extent(mp, pag);
1856                if ((args->minlen + args->alignment + args->minalignslop - 1) >
1857                                longest ||
1858                    ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1859                           need - args->total) < (int)args->minleft)) {
1860                        if (agbp)
1861                                xfs_trans_brelse(tp, agbp);
1862                        args->agbp = NULL;
1863                        return 0;
1864                }
1865        }
1866
1867        /*
1868         * Get the a.g. freespace buffer.
1869         * Can fail if we're not blocking on locks, and it's held.
1870         */
1871        if (agbp == NULL) {
1872                if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1873                                &agbp)))
1874                        return error;
1875                if (agbp == NULL) {
1876                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1877                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
1878                        args->agbp = NULL;
1879                        return 0;
1880                }
1881        }
1882        /*
1883         * Figure out how many blocks we should have in the freelist.
1884         */
1885        agf = XFS_BUF_TO_AGF(agbp);
1886        need = XFS_MIN_FREELIST(agf, mp);
1887        /*
1888         * If there isn't enough total or single-extent, reject it.
1889         */
1890        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1891                delta = need > be32_to_cpu(agf->agf_flcount) ?
1892                        (need - be32_to_cpu(agf->agf_flcount)) : 0;
1893                longest = be32_to_cpu(agf->agf_longest);
1894                longest = (longest > delta) ? (longest - delta) :
1895                        (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1896                if ((args->minlen + args->alignment + args->minalignslop - 1) >
1897                                longest ||
1898                    ((int)(be32_to_cpu(agf->agf_freeblks) +
1899                     be32_to_cpu(agf->agf_flcount) - need - args->total) <
1900                                (int)args->minleft)) {
1901                        xfs_trans_brelse(tp, agbp);
1902                        args->agbp = NULL;
1903                        return 0;
1904                }
1905        }
1906        /*
1907         * Make the freelist shorter if it's too long.
1908         */
1909        while (be32_to_cpu(agf->agf_flcount) > need) {
1910                xfs_buf_t       *bp;
1911
1912                error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1913                if (error)
1914                        return error;
1915                if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1916                        return error;
1917                bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1918                xfs_trans_binval(tp, bp);
1919        }
1920        /*
1921         * Initialize the args structure.
1922         */
1923        memset(&targs, 0, sizeof(targs));
1924        targs.tp = tp;
1925        targs.mp = mp;
1926        targs.agbp = agbp;
1927        targs.agno = args->agno;
1928        targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1929                targs.minalignslop = 0;
1930        targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1931        targs.type = XFS_ALLOCTYPE_THIS_AG;
1932        targs.pag = pag;
1933        if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1934                return error;
1935        /*
1936         * Make the freelist longer if it's too short.
1937         */
1938        while (be32_to_cpu(agf->agf_flcount) < need) {
1939                targs.agbno = 0;
1940                targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
1941                /*
1942                 * Allocate as many blocks as possible at once.
1943                 */
1944                if ((error = xfs_alloc_ag_vextent(&targs))) {
1945                        xfs_trans_brelse(tp, agflbp);
1946                        return error;
1947                }
1948                /*
1949                 * Stop if we run out.  Won't happen if callers are obeying
1950                 * the restrictions correctly.  Can happen for free calls
1951                 * on a completely full ag.
1952                 */
1953                if (targs.agbno == NULLAGBLOCK) {
1954                        if (flags & XFS_ALLOC_FLAG_FREEING)
1955                                break;
1956                        xfs_trans_brelse(tp, agflbp);
1957                        args->agbp = NULL;
1958                        return 0;
1959                }
1960                /*
1961                 * Put each allocated block on the list.
1962                 */
1963                for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
1964                        error = xfs_alloc_put_freelist(tp, agbp,
1965                                                        agflbp, bno, 0);
1966                        if (error)
1967                                return error;
1968                }
1969        }
1970        xfs_trans_brelse(tp, agflbp);
1971        args->agbp = agbp;
1972        return 0;
1973}
1974
1975/*
1976 * Get a block from the freelist.
1977 * Returns with the buffer for the block gotten.
1978 */
1979int                             /* error */
1980xfs_alloc_get_freelist(
1981        xfs_trans_t     *tp,    /* transaction pointer */
1982        xfs_buf_t       *agbp,  /* buffer containing the agf structure */
1983        xfs_agblock_t   *bnop,  /* block address retrieved from freelist */
1984        int             btreeblk) /* destination is a AGF btree */
1985{
1986        xfs_agf_t       *agf;   /* a.g. freespace structure */
1987        xfs_agfl_t      *agfl;  /* a.g. freelist structure */
1988        xfs_buf_t       *agflbp;/* buffer for a.g. freelist structure */
1989        xfs_agblock_t   bno;    /* block number returned */
1990        int             error;
1991        int             logflags;
1992        xfs_mount_t     *mp;    /* mount structure */
1993        xfs_perag_t     *pag;   /* per allocation group data */
1994
1995        agf = XFS_BUF_TO_AGF(agbp);
1996        /*
1997         * Freelist is empty, give up.
1998         */
1999        if (!agf->agf_flcount) {
2000                *bnop = NULLAGBLOCK;
2001                return 0;
2002        }
2003        /*
2004         * Read the array of free blocks.
2005         */
2006        mp = tp->t_mountp;
2007        if ((error = xfs_alloc_read_agfl(mp, tp,
2008                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2009                return error;
2010        agfl = XFS_BUF_TO_AGFL(agflbp);
2011        /*
2012         * Get the block number and update the data structures.
2013         */
2014        bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2015        be32_add_cpu(&agf->agf_flfirst, 1);
2016        xfs_trans_brelse(tp, agflbp);
2017        if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
2018                agf->agf_flfirst = 0;
2019
2020        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2021        be32_add_cpu(&agf->agf_flcount, -1);
2022        xfs_trans_agflist_delta(tp, -1);
2023        pag->pagf_flcount--;
2024        xfs_perag_put(pag);
2025
2026        logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2027        if (btreeblk) {
2028                be32_add_cpu(&agf->agf_btreeblks, 1);
2029                pag->pagf_btreeblks++;
2030                logflags |= XFS_AGF_BTREEBLKS;
2031        }
2032
2033        xfs_alloc_log_agf(tp, agbp, logflags);
2034        *bnop = bno;
2035
2036        return 0;
2037}
2038
2039/*
2040 * Log the given fields from the agf structure.
2041 */
2042void
2043xfs_alloc_log_agf(
2044        xfs_trans_t     *tp,    /* transaction pointer */
2045        xfs_buf_t       *bp,    /* buffer for a.g. freelist header */
2046        int             fields) /* mask of fields to be logged (XFS_AGF_...) */
2047{
2048        int     first;          /* first byte offset */
2049        int     last;           /* last byte offset */
2050        static const short      offsets[] = {
2051                offsetof(xfs_agf_t, agf_magicnum),
2052                offsetof(xfs_agf_t, agf_versionnum),
2053                offsetof(xfs_agf_t, agf_seqno),
2054                offsetof(xfs_agf_t, agf_length),
2055                offsetof(xfs_agf_t, agf_roots[0]),
2056                offsetof(xfs_agf_t, agf_levels[0]),
2057                offsetof(xfs_agf_t, agf_flfirst),
2058                offsetof(xfs_agf_t, agf_fllast),
2059                offsetof(xfs_agf_t, agf_flcount),
2060                offsetof(xfs_agf_t, agf_freeblks),
2061                offsetof(xfs_agf_t, agf_longest),
2062                offsetof(xfs_agf_t, agf_btreeblks),
2063                sizeof(xfs_agf_t)
2064        };
2065
2066        trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2067
2068        xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2069        xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2070}
2071
2072/*
2073 * Interface for inode allocation to force the pag data to be initialized.
2074 */
2075int                                     /* error */
2076xfs_alloc_pagf_init(
2077        xfs_mount_t             *mp,    /* file system mount structure */
2078        xfs_trans_t             *tp,    /* transaction pointer */
2079        xfs_agnumber_t          agno,   /* allocation group number */
2080        int                     flags)  /* XFS_ALLOC_FLAGS_... */
2081{
2082        xfs_buf_t               *bp;
2083        int                     error;
2084
2085        if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2086                return error;
2087        if (bp)
2088                xfs_trans_brelse(tp, bp);
2089        return 0;
2090}
2091
2092/*
2093 * Put the block on the freelist for the allocation group.
2094 */
2095int                                     /* error */
2096xfs_alloc_put_freelist(
2097        xfs_trans_t             *tp,    /* transaction pointer */
2098        xfs_buf_t               *agbp,  /* buffer for a.g. freelist header */
2099        xfs_buf_t               *agflbp,/* buffer for a.g. free block array */
2100        xfs_agblock_t           bno,    /* block being freed */
2101        int                     btreeblk) /* block came from a AGF btree */
2102{
2103        xfs_agf_t               *agf;   /* a.g. freespace structure */
2104        xfs_agfl_t              *agfl;  /* a.g. free block array */
2105        __be32                  *blockp;/* pointer to array entry */
2106        int                     error;
2107        int                     logflags;
2108        xfs_mount_t             *mp;    /* mount structure */
2109        xfs_perag_t             *pag;   /* per allocation group data */
2110
2111        agf = XFS_BUF_TO_AGF(agbp);
2112        mp = tp->t_mountp;
2113
2114        if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2115                        be32_to_cpu(agf->agf_seqno), &agflbp)))
2116                return error;
2117        agfl = XFS_BUF_TO_AGFL(agflbp);
2118        be32_add_cpu(&agf->agf_fllast, 1);
2119        if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
2120                agf->agf_fllast = 0;
2121
2122        pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
2123        be32_add_cpu(&agf->agf_flcount, 1);
2124        xfs_trans_agflist_delta(tp, 1);
2125        pag->pagf_flcount++;
2126
2127        logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2128        if (btreeblk) {
2129                be32_add_cpu(&agf->agf_btreeblks, -1);
2130                pag->pagf_btreeblks--;
2131                logflags |= XFS_AGF_BTREEBLKS;
2132        }
2133        xfs_perag_put(pag);
2134
2135        xfs_alloc_log_agf(tp, agbp, logflags);
2136
2137        ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2138        blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
2139        *blockp = cpu_to_be32(bno);
2140        xfs_alloc_log_agf(tp, agbp, logflags);
2141        xfs_trans_log_buf(tp, agflbp,
2142                (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2143                (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2144                        sizeof(xfs_agblock_t) - 1));
2145        return 0;
2146}
2147
2148static void
2149xfs_agf_verify(
2150        struct xfs_buf  *bp)
2151 {
2152        struct xfs_mount *mp = bp->b_target->bt_mount;
2153        struct xfs_agf  *agf;
2154        int             agf_ok;
2155
2156        agf = XFS_BUF_TO_AGF(bp);
2157
2158        agf_ok = agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2159                XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2160                be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2161                be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2162                be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
2163                be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp);
2164
2165        /*
2166         * during growfs operations, the perag is not fully initialised,
2167         * so we can't use it for any useful checking. growfs ensures we can't
2168         * use it by using uncached buffers that don't have the perag attached
2169         * so we can detect and avoid this problem.
2170         */
2171        if (bp->b_pag)
2172                agf_ok = agf_ok && be32_to_cpu(agf->agf_seqno) ==
2173                                                bp->b_pag->pag_agno;
2174
2175        if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2176                agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2177                                                be32_to_cpu(agf->agf_length);
2178
2179        if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2180                        XFS_RANDOM_ALLOC_READ_AGF))) {
2181                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agf);
2182                xfs_buf_ioerror(bp, EFSCORRUPTED);
2183        }
2184}
2185
2186static void
2187xfs_agf_read_verify(
2188        struct xfs_buf  *bp)
2189{
2190        xfs_agf_verify(bp);
2191}
2192
2193static void
2194xfs_agf_write_verify(
2195        struct xfs_buf  *bp)
2196{
2197        xfs_agf_verify(bp);
2198}
2199
2200const struct xfs_buf_ops xfs_agf_buf_ops = {
2201        .verify_read = xfs_agf_read_verify,
2202        .verify_write = xfs_agf_write_verify,
2203};
2204
2205/*
2206 * Read in the allocation group header (free/alloc section).
2207 */
2208int                                     /* error */
2209xfs_read_agf(
2210        struct xfs_mount        *mp,    /* mount point structure */
2211        struct xfs_trans        *tp,    /* transaction pointer */
2212        xfs_agnumber_t          agno,   /* allocation group number */
2213        int                     flags,  /* XFS_BUF_ */
2214        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2215{
2216        int             error;
2217
2218        ASSERT(agno != NULLAGNUMBER);
2219        error = xfs_trans_read_buf(
2220                        mp, tp, mp->m_ddev_targp,
2221                        XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2222                        XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
2223        if (error)
2224                return error;
2225        if (!*bpp)
2226                return 0;
2227
2228        ASSERT(!(*bpp)->b_error);
2229        xfs_buf_set_ref(*bpp, XFS_AGF_REF);
2230        return 0;
2231}
2232
2233/*
2234 * Read in the allocation group header (free/alloc section).
2235 */
2236int                                     /* error */
2237xfs_alloc_read_agf(
2238        struct xfs_mount        *mp,    /* mount point structure */
2239        struct xfs_trans        *tp,    /* transaction pointer */
2240        xfs_agnumber_t          agno,   /* allocation group number */
2241        int                     flags,  /* XFS_ALLOC_FLAG_... */
2242        struct xfs_buf          **bpp)  /* buffer for the ag freelist header */
2243{
2244        struct xfs_agf          *agf;           /* ag freelist header */
2245        struct xfs_perag        *pag;           /* per allocation group data */
2246        int                     error;
2247
2248        ASSERT(agno != NULLAGNUMBER);
2249
2250        error = xfs_read_agf(mp, tp, agno,
2251                        (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
2252                        bpp);
2253        if (error)
2254                return error;
2255        if (!*bpp)
2256                return 0;
2257        ASSERT(!(*bpp)->b_error);
2258
2259        agf = XFS_BUF_TO_AGF(*bpp);
2260        pag = xfs_perag_get(mp, agno);
2261        if (!pag->pagf_init) {
2262                pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
2263                pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
2264                pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2265                pag->pagf_longest = be32_to_cpu(agf->agf_longest);
2266                pag->pagf_levels[XFS_BTNUM_BNOi] =
2267                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
2268                pag->pagf_levels[XFS_BTNUM_CNTi] =
2269                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
2270                spin_lock_init(&pag->pagb_lock);
2271                pag->pagb_count = 0;
2272                pag->pagb_tree = RB_ROOT;
2273                pag->pagf_init = 1;
2274        }
2275#ifdef DEBUG
2276        else if (!XFS_FORCED_SHUTDOWN(mp)) {
2277                ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
2278                ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
2279                ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2280                ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
2281                ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2282                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
2283                ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2284                       be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
2285        }
2286#endif
2287        xfs_perag_put(pag);
2288        return 0;
2289}
2290
2291/*
2292 * Allocate an extent (variable-size).
2293 * Depending on the allocation type, we either look in a single allocation
2294 * group or loop over the allocation groups to find the result.
2295 */
2296int                             /* error */
2297xfs_alloc_vextent(
2298        xfs_alloc_arg_t *args)  /* allocation argument structure */
2299{
2300        xfs_agblock_t   agsize; /* allocation group size */
2301        int             error;
2302        int             flags;  /* XFS_ALLOC_FLAG_... locking flags */
2303        xfs_extlen_t    minleft;/* minimum left value, temp copy */
2304        xfs_mount_t     *mp;    /* mount structure pointer */
2305        xfs_agnumber_t  sagno;  /* starting allocation group number */
2306        xfs_alloctype_t type;   /* input allocation type */
2307        int             bump_rotor = 0;
2308        int             no_min = 0;
2309        xfs_agnumber_t  rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2310
2311        mp = args->mp;
2312        type = args->otype = args->type;
2313        args->agbno = NULLAGBLOCK;
2314        /*
2315         * Just fix this up, for the case where the last a.g. is shorter
2316         * (or there's only one a.g.) and the caller couldn't easily figure
2317         * that out (xfs_bmap_alloc).
2318         */
2319        agsize = mp->m_sb.sb_agblocks;
2320        if (args->maxlen > agsize)
2321                args->maxlen = agsize;
2322        if (args->alignment == 0)
2323                args->alignment = 1;
2324        ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2325        ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2326        ASSERT(args->minlen <= args->maxlen);
2327        ASSERT(args->minlen <= agsize);
2328        ASSERT(args->mod < args->prod);
2329        if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2330            XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2331            args->minlen > args->maxlen || args->minlen > agsize ||
2332            args->mod >= args->prod) {
2333                args->fsbno = NULLFSBLOCK;
2334                trace_xfs_alloc_vextent_badargs(args);
2335                return 0;
2336        }
2337        minleft = args->minleft;
2338
2339        switch (type) {
2340        case XFS_ALLOCTYPE_THIS_AG:
2341        case XFS_ALLOCTYPE_NEAR_BNO:
2342        case XFS_ALLOCTYPE_THIS_BNO:
2343                /*
2344                 * These three force us into a single a.g.
2345                 */
2346                args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2347                args->pag = xfs_perag_get(mp, args->agno);
2348                args->minleft = 0;
2349                error = xfs_alloc_fix_freelist(args, 0);
2350                args->minleft = minleft;
2351                if (error) {
2352                        trace_xfs_alloc_vextent_nofix(args);
2353                        goto error0;
2354                }
2355                if (!args->agbp) {
2356                        trace_xfs_alloc_vextent_noagbp(args);
2357                        break;
2358                }
2359                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2360                if ((error = xfs_alloc_ag_vextent(args)))
2361                        goto error0;
2362                break;
2363        case XFS_ALLOCTYPE_START_BNO:
2364                /*
2365                 * Try near allocation first, then anywhere-in-ag after
2366                 * the first a.g. fails.
2367                 */
2368                if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
2369                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2370                        args->fsbno = XFS_AGB_TO_FSB(mp,
2371                                        ((mp->m_agfrotor / rotorstep) %
2372                                        mp->m_sb.sb_agcount), 0);
2373                        bump_rotor = 1;
2374                }
2375                args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2376                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2377                /* FALLTHROUGH */
2378        case XFS_ALLOCTYPE_ANY_AG:
2379        case XFS_ALLOCTYPE_START_AG:
2380        case XFS_ALLOCTYPE_FIRST_AG:
2381                /*
2382                 * Rotate through the allocation groups looking for a winner.
2383                 */
2384                if (type == XFS_ALLOCTYPE_ANY_AG) {
2385                        /*
2386                         * Start with the last place we left off.
2387                         */
2388                        args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2389                                        mp->m_sb.sb_agcount;
2390                        args->type = XFS_ALLOCTYPE_THIS_AG;
2391                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2392                } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2393                        /*
2394                         * Start with allocation group given by bno.
2395                         */
2396                        args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2397                        args->type = XFS_ALLOCTYPE_THIS_AG;
2398                        sagno = 0;
2399                        flags = 0;
2400                } else {
2401                        if (type == XFS_ALLOCTYPE_START_AG)
2402                                args->type = XFS_ALLOCTYPE_THIS_AG;
2403                        /*
2404                         * Start with the given allocation group.
2405                         */
2406                        args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2407                        flags = XFS_ALLOC_FLAG_TRYLOCK;
2408                }
2409                /*
2410                 * Loop over allocation groups twice; first time with
2411                 * trylock set, second time without.
2412                 */
2413                for (;;) {
2414                        args->pag = xfs_perag_get(mp, args->agno);
2415                        if (no_min) args->minleft = 0;
2416                        error = xfs_alloc_fix_freelist(args, flags);
2417                        args->minleft = minleft;
2418                        if (error) {
2419                                trace_xfs_alloc_vextent_nofix(args);
2420                                goto error0;
2421                        }
2422                        /*
2423                         * If we get a buffer back then the allocation will fly.
2424                         */
2425                        if (args->agbp) {
2426                                if ((error = xfs_alloc_ag_vextent(args)))
2427                                        goto error0;
2428                                break;
2429                        }
2430
2431                        trace_xfs_alloc_vextent_loopfailed(args);
2432
2433                        /*
2434                         * Didn't work, figure out the next iteration.
2435                         */
2436                        if (args->agno == sagno &&
2437                            type == XFS_ALLOCTYPE_START_BNO)
2438                                args->type = XFS_ALLOCTYPE_THIS_AG;
2439                        /*
2440                        * For the first allocation, we can try any AG to get
2441                        * space.  However, if we already have allocated a
2442                        * block, we don't want to try AGs whose number is below
2443                        * sagno. Otherwise, we may end up with out-of-order
2444                        * locking of AGF, which might cause deadlock.
2445                        */
2446                        if (++(args->agno) == mp->m_sb.sb_agcount) {
2447                                if (args->firstblock != NULLFSBLOCK)
2448                                        args->agno = sagno;
2449                                else
2450                                        args->agno = 0;
2451                        }
2452                        /*
2453                         * Reached the starting a.g., must either be done
2454                         * or switch to non-trylock mode.
2455                         */
2456                        if (args->agno == sagno) {
2457                                if (no_min == 1) {
2458                                        args->agbno = NULLAGBLOCK;
2459                                        trace_xfs_alloc_vextent_allfailed(args);
2460                                        break;
2461                                }
2462                                if (flags == 0) {
2463                                        no_min = 1;
2464                                } else {
2465                                        flags = 0;
2466                                        if (type == XFS_ALLOCTYPE_START_BNO) {
2467                                                args->agbno = XFS_FSB_TO_AGBNO(mp,
2468                                                        args->fsbno);
2469                                                args->type = XFS_ALLOCTYPE_NEAR_BNO;
2470                                        }
2471                                }
2472                        }
2473                        xfs_perag_put(args->pag);
2474                }
2475                if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2476                        if (args->agno == sagno)
2477                                mp->m_agfrotor = (mp->m_agfrotor + 1) %
2478                                        (mp->m_sb.sb_agcount * rotorstep);
2479                        else
2480                                mp->m_agfrotor = (args->agno * rotorstep + 1) %
2481                                        (mp->m_sb.sb_agcount * rotorstep);
2482                }
2483                break;
2484        default:
2485                ASSERT(0);
2486                /* NOTREACHED */
2487        }
2488        if (args->agbno == NULLAGBLOCK)
2489                args->fsbno = NULLFSBLOCK;
2490        else {
2491                args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2492#ifdef DEBUG
2493                ASSERT(args->len >= args->minlen);
2494                ASSERT(args->len <= args->maxlen);
2495                ASSERT(args->agbno % args->alignment == 0);
2496                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2497                        args->len);
2498#endif
2499        }
2500        xfs_perag_put(args->pag);
2501        return 0;
2502error0:
2503        xfs_perag_put(args->pag);
2504        return error;
2505}
2506
2507/*
2508 * Free an extent.
2509 * Just break up the extent address and hand off to xfs_free_ag_extent
2510 * after fixing up the freelist.
2511 */
2512int                             /* error */
2513xfs_free_extent(
2514        xfs_trans_t     *tp,    /* transaction pointer */
2515        xfs_fsblock_t   bno,    /* starting block number of extent */
2516        xfs_extlen_t    len)    /* length of extent */
2517{
2518        xfs_alloc_arg_t args;
2519        int             error;
2520
2521        ASSERT(len != 0);
2522        memset(&args, 0, sizeof(xfs_alloc_arg_t));
2523        args.tp = tp;
2524        args.mp = tp->t_mountp;
2525
2526        /*
2527         * validate that the block number is legal - the enables us to detect
2528         * and handle a silent filesystem corruption rather than crashing.
2529         */
2530        args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2531        if (args.agno >= args.mp->m_sb.sb_agcount)
2532                return EFSCORRUPTED;
2533
2534        args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2535        if (args.agbno >= args.mp->m_sb.sb_agblocks)
2536                return EFSCORRUPTED;
2537
2538        args.pag = xfs_perag_get(args.mp, args.agno);
2539        ASSERT(args.pag);
2540
2541        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2542        if (error)
2543                goto error0;
2544
2545        /* validate the extent size is legal now we have the agf locked */
2546        if (args.agbno + len >
2547                        be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2548                error = EFSCORRUPTED;
2549                goto error0;
2550        }
2551
2552        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2553        if (!error)
2554                xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0);
2555error0:
2556        xfs_perag_put(args.pag);
2557        return error;
2558}
2559
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.