darwin-xnu/bsd/hfs/hfs_readwrite.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3 *
   4 * @APPLE_LICENSE_HEADER_START@
   5 * 
   6 * The contents of this file constitute Original Code as defined in and
   7 * are subject to the Apple Public Source License Version 1.1 (the
   8 * "License").  You may not use this file except in compliance with the
   9 * License.  Please obtain a copy of the License at
  10 * http://www.apple.com/publicsource and read it before using this file.
  11 * 
  12 * This Original Code and all software distributed under the License are
  13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17 * License for the specific language governing rights and limitations
  18 * under the License.
  19 * 
  20 * @APPLE_LICENSE_HEADER_END@
  21 */
  22/*      @(#)hfs_readwrite.c     1.0
  23 *
  24 *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25 *      
  26 *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27 *
  28 */
  29
  30#include <sys/param.h>
  31#include <sys/systm.h>
  32#include <sys/resourcevar.h>
  33#include <sys/kernel.h>
  34#include <sys/fcntl.h>
  35#include <sys/filedesc.h>
  36#include <sys/stat.h>
  37#include <sys/buf.h>
  38#include <sys/proc.h>
  39#include <sys/kauth.h>
  40#include <sys/vnode.h>
  41#include <sys/uio.h>
  42#include <sys/vfs_context.h>
  43
  44#include <miscfs/specfs/specdev.h>
  45
  46#include <sys/ubc.h>
  47#include <vm/vm_pageout.h>
  48#include <vm/vm_kern.h>
  49
  50#include <sys/kdebug.h>
  51
  52#include        "hfs.h"
  53#include        "hfs_endian.h"
  54#include  "hfs_fsctl.h"
  55#include        "hfs_quota.h"
  56#include        "hfscommon/headers/FileMgrInternal.h"
  57#include        "hfscommon/headers/BTreesInternal.h"
  58#include        "hfs_cnode.h"
  59#include        "hfs_dbg.h"
  60
  61extern int overflow_extents(struct filefork *fp);
  62
  63#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  64
  65enum {
  66        MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  67};
  68
  69extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  70
  71extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  72
  73
  74static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  75static int  hfs_clonefile(struct vnode *, int, int, int);
  76static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  77
  78
  79/*****************************************************************************
  80*
  81*       I/O Operations on vnodes
  82*
  83*****************************************************************************/
  84int  hfs_vnop_read(struct vnop_read_args *);
  85int  hfs_vnop_write(struct vnop_write_args *);
  86int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  87int  hfs_vnop_select(struct vnop_select_args *);
  88int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  89int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  90int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  91int  hfs_vnop_strategy(struct vnop_strategy_args *);
  92int  hfs_vnop_allocate(struct vnop_allocate_args *);
  93int  hfs_vnop_pagein(struct vnop_pagein_args *);
  94int  hfs_vnop_pageout(struct vnop_pageout_args *);
  95int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
  96
  97
  98/*
  99 * Read data from a file.
 100 */
 101int
 102hfs_vnop_read(struct vnop_read_args *ap)
 103{
 104        uio_t uio = ap->a_uio;
 105        struct vnode *vp = ap->a_vp;
 106        struct cnode *cp;
 107        struct filefork *fp;
 108        struct hfsmount *hfsmp;
 109        off_t filesize;
 110        off_t filebytes;
 111        off_t start_resid = uio_resid(uio);
 112        off_t offset = uio_offset(uio);
 113        int retval = 0;
 114
 115
 116        /* Preflight checks */
 117        if (!vnode_isreg(vp)) {
 118                /* can only read regular files */
 119                if (vnode_isdir(vp))
 120                        return (EISDIR);
 121                else
 122                        return (EPERM);
 123        }
 124        if (start_resid == 0)
 125                return (0);             /* Nothing left to do */
 126        if (offset < 0)
 127                return (EINVAL);        /* cant read from a negative offset */
 128
 129        cp = VTOC(vp);
 130        fp = VTOF(vp);
 131        hfsmp = VTOHFS(vp);
 132
 133        /* Protect against a size change. */
 134        hfs_lock_truncate(cp, 0);
 135
 136        filesize = fp->ff_size;
 137        filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 138        if (offset > filesize) {
 139                if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 140                    (offset > (off_t)MAXHFSFILESIZE)) {
 141                        retval = EFBIG;
 142                }
 143                goto exit;
 144        }
 145
 146        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 147                (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 148
 149        retval = cluster_read(vp, uio, filesize, 0);
 150
 151        cp->c_touch_acctime = TRUE;
 152
 153        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 154                (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 155
 156        /*
 157         * Keep track blocks read
 158         */
 159        if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 160                int took_cnode_lock = 0;
 161                off_t bytesread;
 162
 163                bytesread = start_resid - uio_resid(uio);
 164
 165                /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 166                if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 167                        hfs_lock(cp, HFS_FORCE_LOCK);
 168                        took_cnode_lock = 1;
 169                }
 170                /*
 171                 * If this file hasn't been seen since the start of
 172                 * the current sampling period then start over.
 173                 */
 174                if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 175                        struct timeval tv;
 176
 177                        fp->ff_bytesread = bytesread;
 178                        microtime(&tv);
 179                        cp->c_atime = tv.tv_sec;
 180                } else {
 181                        fp->ff_bytesread += bytesread;
 182                }
 183                if (took_cnode_lock)
 184                        hfs_unlock(cp);
 185        }
 186exit:
 187        hfs_unlock_truncate(cp);
 188        return (retval);
 189}
 190
 191/*
 192 * Write data to a file.
 193 */
 194int
 195hfs_vnop_write(struct vnop_write_args *ap)
 196{
 197        uio_t uio = ap->a_uio;
 198        struct vnode *vp = ap->a_vp;
 199        struct cnode *cp;
 200        struct filefork *fp;
 201        struct hfsmount *hfsmp;
 202        kauth_cred_t cred = NULL;
 203        off_t origFileSize;
 204        off_t writelimit;
 205        off_t bytesToAdd;
 206        off_t actualBytesAdded;
 207        off_t filebytes;
 208        off_t offset;
 209        size_t resid;
 210        int eflags;
 211        int ioflag = ap->a_ioflag;
 212        int retval = 0;
 213        int lockflags;
 214        int cnode_locked = 0;
 215
 216        // LP64todo - fix this! uio_resid may be 64-bit value
 217        resid = uio_resid(uio);
 218        offset = uio_offset(uio);
 219
 220        if (offset < 0)
 221                return (EINVAL);
 222        if (resid == 0)
 223                return (E_NONE);
 224        if (!vnode_isreg(vp))
 225                return (EPERM);  /* Can only write regular files */
 226
 227        /* Protect against a size change. */
 228        hfs_lock_truncate(VTOC(vp), TRUE);
 229
 230        if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 231                hfs_unlock_truncate(VTOC(vp));
 232                return (retval);
 233        }
 234        cnode_locked = 1;
 235        cp = VTOC(vp);
 236        fp = VTOF(vp);
 237        hfsmp = VTOHFS(vp);
 238        filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 239
 240        if (ioflag & IO_APPEND) {
 241                uio_setoffset(uio, fp->ff_size);
 242                offset = fp->ff_size;
 243        }
 244        if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 245                retval = EPERM;
 246                goto exit;
 247        }
 248
 249        origFileSize = fp->ff_size;
 250        eflags = kEFDeferMask;  /* defer file block allocations */
 251
 252#ifdef HFS_SPARSE_DEV
 253        /* 
 254         * When the underlying device is sparse and space
 255         * is low (< 8MB), stop doing delayed allocations
 256         * and begin doing synchronous I/O.
 257         */
 258        if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 259            (hfs_freeblks(hfsmp, 0) < 2048)) {
 260                eflags &= ~kEFDeferMask;
 261                ioflag |= IO_SYNC;
 262        }
 263#endif /* HFS_SPARSE_DEV */
 264
 265        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 266                (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 267
 268        /* Now test if we need to extend the file */
 269        /* Doing so will adjust the filebytes for us */
 270
 271        writelimit = offset + resid;
 272        if (writelimit <= filebytes)
 273                goto sizeok;
 274
 275        cred = vfs_context_ucred(ap->a_context);
 276#if QUOTA
 277        bytesToAdd = writelimit - filebytes;
 278        retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)), 
 279                           cred, 0);
 280        if (retval)
 281                goto exit;
 282#endif /* QUOTA */
 283
 284        if (hfs_start_transaction(hfsmp) != 0) {
 285                retval = EINVAL;
 286                goto exit;
 287        }
 288
 289        while (writelimit > filebytes) {
 290                bytesToAdd = writelimit - filebytes;
 291                if (cred && suser(cred, NULL) != 0)
 292                        eflags |= kEFReserveMask;
 293
 294                /* Protect extents b-tree and allocation bitmap */
 295                lockflags = SFL_BITMAP;
 296                if (overflow_extents(fp))
 297                        lockflags |= SFL_EXTENTS;
 298                lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 299        
 300                /* Files that are changing size are not hot file candidates. */
 301                if (hfsmp->hfc_stage == HFC_RECORDING) {
 302                        fp->ff_bytesread = 0;
 303                }
 304                retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 305                                0, eflags, &actualBytesAdded));
 306
 307                hfs_systemfile_unlock(hfsmp, lockflags);
 308
 309                if ((actualBytesAdded == 0) && (retval == E_NONE))
 310                        retval = ENOSPC;
 311                if (retval != E_NONE)
 312                        break;
 313                filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 314                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 315                        (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 316        }
 317        (void) hfs_update(vp, TRUE);
 318        (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 319        (void) hfs_end_transaction(hfsmp);
 320
 321sizeok:
 322        if (retval == E_NONE) {
 323                off_t filesize;
 324                off_t zero_off;
 325                off_t tail_off;
 326                off_t inval_start;
 327                off_t inval_end;
 328                off_t io_start;
 329                int lflag;
 330                struct rl_entry *invalid_range;
 331
 332                if (writelimit > fp->ff_size)
 333                        filesize = writelimit;
 334                else
 335                        filesize = fp->ff_size;
 336
 337                lflag = (ioflag & IO_SYNC);
 338
 339                if (offset <= fp->ff_size) {
 340                        zero_off = offset & ~PAGE_MASK_64;
 341                        
 342                        /* Check to see whether the area between the zero_offset and the start
 343                           of the transfer to see whether is invalid and should be zero-filled
 344                           as part of the transfer:
 345                         */
 346                        if (offset > zero_off) {
 347                                if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 348                                        lflag |= IO_HEADZEROFILL;
 349                        }
 350                } else {
 351                        off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 352                        
 353                        /* The bytes between fp->ff_size and uio->uio_offset must never be
 354                           read without being zeroed.  The current last block is filled with zeroes
 355                           if it holds valid data but in all cases merely do a little bookkeeping
 356                           to track the area from the end of the current last page to the start of
 357                           the area actually written.  For the same reason only the bytes up to the
 358                           start of the page where this write will start is invalidated; any remainder
 359                           before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 360                           
 361                           Note that inval_start, the start of the page after the current EOF,
 362                           may be past the start of the write, in which case the zeroing
 363                           will be handled by the cluser_write of the actual data.
 364                         */
 365                        inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 366                        inval_end = offset & ~PAGE_MASK_64;
 367                        zero_off = fp->ff_size;
 368                        
 369                        if ((fp->ff_size & PAGE_MASK_64) &&
 370                                (rl_scan(&fp->ff_invalidranges,
 371                                                        eof_page_base,
 372                                                        fp->ff_size - 1,
 373                                                        &invalid_range) != RL_NOOVERLAP)) {
 374                                /* The page containing the EOF is not valid, so the
 375                                   entire page must be made inaccessible now.  If the write
 376                                   starts on a page beyond the page containing the eof
 377                                   (inval_end > eof_page_base), add the
 378                                   whole page to the range to be invalidated.  Otherwise
 379                                   (i.e. if the write starts on the same page), zero-fill
 380                                   the entire page explicitly now:
 381                                 */
 382                                if (inval_end > eof_page_base) {
 383                                        inval_start = eof_page_base;
 384                                } else {
 385                                        zero_off = eof_page_base;
 386                                };
 387                        };
 388                        
 389                        if (inval_start < inval_end) {
 390                                struct timeval tv;
 391                                /* There's some range of data that's going to be marked invalid */
 392                                
 393                                if (zero_off < inval_start) {
 394                                        /* The pages between inval_start and inval_end are going to be invalidated,
 395                                           and the actual write will start on a page past inval_end.  Now's the last
 396                                           chance to zero-fill the page containing the EOF:
 397                                         */
 398                                        hfs_unlock(cp);
 399                                        cnode_locked = 0;
 400                                        retval = cluster_write(vp, (uio_t) 0,
 401                                                        fp->ff_size, inval_start,
 402                                                        zero_off, (off_t)0,
 403                                                        lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 404                                        hfs_lock(cp, HFS_FORCE_LOCK);
 405                                        cnode_locked = 1;
 406                                        if (retval) goto ioerr_exit;
 407                                        offset = uio_offset(uio);
 408                                };
 409                                
 410                                /* Mark the remaining area of the newly allocated space as invalid: */
 411                                rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 412                                microuptime(&tv);
 413                                cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 414                                zero_off = fp->ff_size = inval_end;
 415                        };
 416                        
 417                        if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 418                };
 419
 420                /* Check to see whether the area between the end of the write and the end of
 421                   the page it falls in is invalid and should be zero-filled as part of the transfer:
 422                 */
 423                tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 424                if (tail_off > filesize) tail_off = filesize;
 425                if (tail_off > writelimit) {
 426                        if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 427                                lflag |= IO_TAILZEROFILL;
 428                        };
 429                };
 430                
 431                /*
 432                 * if the write starts beyond the current EOF (possibly advanced in the
 433                 * zeroing of the last block, above), then we'll zero fill from the current EOF
 434                 * to where the write begins:
 435                 *
 436                 * NOTE: If (and ONLY if) the portion of the file about to be written is
 437                 *       before the current EOF it might be marked as invalid now and must be
 438                 *       made readable (removed from the invalid ranges) before cluster_write
 439                 *       tries to write it:
 440                 */
 441                io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 442                if (io_start < fp->ff_size) {
 443                        off_t io_end;
 444
 445                        io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 446                        rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 447                };
 448
 449                hfs_unlock(cp);
 450                cnode_locked = 0;
 451                retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 452                                tail_off, lflag | IO_NOZERODIRTY);
 453                offset = uio_offset(uio);
 454                if (offset > fp->ff_size) {
 455                        fp->ff_size = offset;
 456
 457                        ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 458                        /* Files that are changing size are not hot file candidates. */
 459                        if (hfsmp->hfc_stage == HFC_RECORDING)
 460                                fp->ff_bytesread = 0;
 461                }
 462                if (resid > uio_resid(uio)) {
 463                        cp->c_touch_chgtime = TRUE;
 464                        cp->c_touch_modtime = TRUE;
 465                }
 466        }
 467        HFS_KNOTE(vp, NOTE_WRITE);
 468
 469ioerr_exit:
 470        /*
 471         * If we successfully wrote any data, and we are not the superuser
 472         * we clear the setuid and setgid bits as a precaution against
 473         * tampering.
 474         */
 475        if (cp->c_mode & (S_ISUID | S_ISGID)) {
 476                cred = vfs_context_ucred(ap->a_context);
 477                if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 478                        if (!cnode_locked) {
 479                                hfs_lock(cp, HFS_FORCE_LOCK);
 480                                cnode_locked = 1;
 481                        }
 482                        cp->c_mode &= ~(S_ISUID | S_ISGID);
 483                }
 484        }
 485        if (retval) {
 486                if (ioflag & IO_UNIT) {
 487                        if (!cnode_locked) {
 488                                hfs_lock(cp, HFS_FORCE_LOCK);
 489                                cnode_locked = 1;
 490                        }
 491                        (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 492                                           0, ap->a_context);
 493                        // LP64todo - fix this!  resid needs to by user_ssize_t
 494                        uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 495                        uio_setresid(uio, resid);
 496                        filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 497                }
 498        } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 499                if (!cnode_locked) {
 500                        hfs_lock(cp, HFS_FORCE_LOCK);
 501                        cnode_locked = 1;
 502                }
 503                retval = hfs_update(vp, TRUE);
 504        }
 505        /* Updating vcbWrCnt doesn't need to be atomic. */
 506        hfsmp->vcbWrCnt++;
 507
 508        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 509                (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 510exit:
 511        if (cnode_locked)
 512                hfs_unlock(cp);
 513        hfs_unlock_truncate(cp);
 514        return (retval);
 515}
 516
 517/* support for the "bulk-access" fcntl */
 518
 519#define CACHE_ELEMS 64
 520#define CACHE_LEVELS 16
 521#define PARENT_IDS_FLAG 0x100
 522
 523/* from hfs_attrlist.c */
 524extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 525                        mode_t obj_mode, struct mount *mp,
 526                        kauth_cred_t cred, struct proc *p);
 527
 528/* from vfs/vfs_fsevents.c */
 529extern char *get_pathbuff(void);
 530extern void release_pathbuff(char *buff);
 531
 532struct access_cache {
 533       int numcached;
 534       int cachehits; /* these two for statistics gathering */
 535       int lookups;
 536       unsigned int *acache;
 537       Boolean *haveaccess;
 538};
 539
 540struct access_t {
 541        uid_t     uid;              /* IN: effective user id */
 542        short     flags;            /* IN: access requested (i.e. R_OK) */
 543        short     num_groups;       /* IN: number of groups user belongs to */
 544        int       num_files;        /* IN: number of files to process */
 545        int       *file_ids;        /* IN: array of file ids */
 546        gid_t     *groups;          /* IN: array of groups */
 547        short     *access;          /* OUT: access info for each file (0 for 'has access') */
 548};
 549
 550struct user_access_t {
 551        uid_t           uid;                    /* IN: effective user id */
 552        short           flags;                  /* IN: access requested (i.e. R_OK) */
 553        short           num_groups;             /* IN: number of groups user belongs to */
 554        int                     num_files;              /* IN: number of files to process */
 555        user_addr_t     file_ids;               /* IN: array of file ids */
 556        user_addr_t     groups;                 /* IN: array of groups */
 557        user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 558};
 559
 560/*
 561 * Perform a binary search for the given parent_id. Return value is 
 562 * found/not found boolean, and indexp will be the index of the item 
 563 * or the index at which to insert the item if it's not found.
 564 */
 565static int
 566lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 567{
 568        unsigned int lo, hi;
 569        int index, matches = 0;
 570        
 571        if (cache->numcached == 0) {
 572                *indexp = 0;
 573                return 0; // table is empty, so insert at index=0 and report no match
 574        }
 575        
 576        if (cache->numcached > CACHE_ELEMS) {
 577                /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 578                  cache->numcached, CACHE_ELEMS);*/
 579                cache->numcached = CACHE_ELEMS;
 580        }
 581        
 582        lo = 0;
 583        hi = cache->numcached - 1;
 584        index = -1;
 585        
 586        /* perform binary search for parent_id */
 587        do {
 588                unsigned int mid = (hi - lo)/2 + lo;
 589                unsigned int this_id = cache->acache[mid];
 590                
 591                if (parent_id == this_id) {
 592                        index = mid;
 593                        break;
 594                }
 595                
 596                if (parent_id < this_id) {
 597                        hi = mid;
 598                        continue;
 599                }
 600                
 601                if (parent_id > this_id) {
 602                        lo = mid + 1;
 603                        continue;
 604                }
 605        } while(lo < hi);
 606        
 607        /* check if lo and hi converged on the match */
 608        if (parent_id == cache->acache[hi]) {
 609                index = hi;
 610        }
 611        
 612        /* if no existing entry found, find index for new one */
 613        if (index == -1) {
 614                index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 615                matches = 0;
 616        } else {
 617                matches = 1;
 618        }
 619        
 620        *indexp = index;
 621        return matches;
 622}
 623
 624/*
 625 * Add a node to the access_cache at the given index (or do a lookup first
 626 * to find the index if -1 is passed in). We currently do a replace rather
 627 * than an insert if the cache is full.
 628 */
 629static void
 630add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 631{
 632       int lookup_index = -1;
 633
 634       /* need to do a lookup first if -1 passed for index */
 635       if (index == -1) {
 636               if (lookup_bucket(cache, &lookup_index, nodeID)) {
 637                       if (cache->haveaccess[lookup_index] != access) {
 638                               /* change access info for existing entry... should never happen */
 639                               cache->haveaccess[lookup_index] = access;
 640                       }
 641
 642                       /* mission accomplished */
 643                       return;
 644               } else {
 645                       index = lookup_index;
 646               }
 647
 648       }
 649
 650       /* if the cache is full, do a replace rather than an insert */
 651       if (cache->numcached >= CACHE_ELEMS) {
 652               //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 653               cache->numcached = CACHE_ELEMS-1;
 654
 655               if (index > cache->numcached) {
 656                 //    printf("index %d pinned to %d\n", index, cache->numcached);
 657                       index = cache->numcached;
 658               }
 659       } else if (index >= 0 && index < cache->numcached) {
 660               /* only do bcopy if we're inserting */
 661               bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 662               bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 663       }
 664
 665       cache->acache[index] = nodeID;
 666       cache->haveaccess[index] = access;
 667       cache->numcached++;
 668}
 669
 670
 671struct cinfo {
 672        uid_t   uid;
 673        gid_t   gid;
 674        mode_t  mode;
 675        cnid_t  parentcnid;
 676};
 677
 678static int
 679snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 680{
 681        struct cinfo *cip = (struct cinfo *)arg;
 682
 683        cip->uid = attrp->ca_uid;
 684        cip->gid = attrp->ca_gid;
 685        cip->mode = attrp->ca_mode;
 686        cip->parentcnid = descp->cd_parentcnid;
 687        
 688        return (0);
 689}
 690
 691/*
 692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 693 * isn't incore, then go to the catalog.
 694 */ 
 695static int
 696do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid, 
 697               struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 698{
 699        int error = 0;
 700
 701        /* if this id matches the one the fsctl was called with, skip the lookup */
 702        if (cnid == skip_cp->c_cnid) {
 703                cnattrp->ca_uid = skip_cp->c_uid;
 704                cnattrp->ca_gid = skip_cp->c_gid;
 705                cnattrp->ca_mode = skip_cp->c_mode;
 706                keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 707        } else {
 708                struct cinfo c_info;
 709
 710                /* otherwise, check the cnode hash incase the file/dir is incore */
 711                if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 712                        cnattrp->ca_uid = c_info.uid;
 713                        cnattrp->ca_gid = c_info.gid;
 714                        cnattrp->ca_mode = c_info.mode;
 715                        keyp->hfsPlus.parentID = c_info.parentcnid;
 716                } else {
 717                        int lockflags;
 718                        
 719                        lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 720                        
 721                        /* lookup this cnid in the catalog */
 722                        error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 723                        
 724                        hfs_systemfile_unlock(hfsmp, lockflags);
 725                        
 726                        cache->lookups++;
 727                }
 728        }
 729        
 730        return (error);
 731}
 732
 733/*
 734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 735 * up to CACHE_LEVELS as we progress towards the root.
 736 */
 737static int 
 738do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID, 
 739                struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 740{
 741       int                     myErr = 0;
 742       int                     myResult;
 743       HFSCatalogNodeID        thisNodeID;
 744       unsigned long           myPerms;
 745       struct cat_attr         cnattr;
 746       int                     cache_index = -1;
 747       CatalogKey              catkey;
 748
 749       int i = 0, ids_to_cache = 0;
 750       int parent_ids[CACHE_LEVELS];
 751
 752       /* root always has access */
 753       if (!suser(myp_ucred, NULL)) {
 754               return (1);
 755       }
 756
 757       thisNodeID = nodeID;
 758       while (thisNodeID >=  kRootDirID) {
 759               myResult = 0;   /* default to "no access" */
 760       
 761               /* check the cache before resorting to hitting the catalog */
 762
 763               /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 764                * to look any further after hitting cached dir */
 765
 766               if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 767                       cache->cachehits++;
 768                       myResult = cache->haveaccess[cache_index];
 769                       goto ExitThisRoutine;
 770               }
 771
 772               /* remember which parents we want to cache */
 773               if (ids_to_cache < CACHE_LEVELS) {
 774                       parent_ids[ids_to_cache] = thisNodeID;
 775                       ids_to_cache++;
 776               }
 777               
 778               /* do the lookup (checks the cnode hash, then the catalog) */
 779               myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 780               if (myErr) {
 781                       goto ExitThisRoutine; /* no access */
 782               }
 783
 784               myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 785                                                 cnattr.ca_mode, hfsmp->hfs_mp,
 786                                                 myp_ucred, theProcPtr);
 787
 788               if ( (myPerms & X_OK) == 0 ) {
 789                       myResult = 0;
 790                       goto ExitThisRoutine;   /* no access */
 791               } 
 792
 793               /* up the hierarchy we go */
 794               thisNodeID = catkey.hfsPlus.parentID;
 795       }
 796
 797       /* if here, we have access to this node */
 798       myResult = 1;
 799
 800 ExitThisRoutine:
 801       if (myErr) {
 802               //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 803               myResult = 0;
 804       }
 805       *err = myErr;
 806
 807       /* cache the parent directory(ies) */
 808       for (i = 0; i < ids_to_cache; i++) {
 809               /* small optimization: get rid of double-lookup for all these */
 810               // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 811               add_node(cache, -1, parent_ids[i], myResult);
 812       }
 813
 814       return (myResult);
 815}
 816/* end "bulk-access" support */
 817
 818
 819
 820/*
 821 * Callback for use with freeze ioctl.
 822 */
 823static int
 824hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 825{
 826        vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 827
 828        return 0;
 829}
 830
 831/*
 832 * Control filesystem operating characteristics.
 833 */
 834int
 835hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 836                vnode_t a_vp;
 837                int  a_command;
 838                caddr_t  a_data;
 839                int  a_fflag;
 840                vfs_context_t a_context;
 841        } */ *ap)
 842{
 843        struct vnode * vp = ap->a_vp;
 844        struct hfsmount *hfsmp = VTOHFS(vp);
 845        vfs_context_t context = ap->a_context;
 846        kauth_cred_t cred = vfs_context_ucred(context);
 847        proc_t p = vfs_context_proc(context);
 848        struct vfsstatfs *vfsp;
 849        boolean_t is64bit;
 850
 851        is64bit = proc_is64bit(p);
 852
 853        switch (ap->a_command) {
 854
 855        case HFS_RESIZE_VOLUME: {
 856                u_int64_t newsize;
 857                u_int64_t cursize;
 858
 859                vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 860                if (suser(cred, NULL) &&
 861                        kauth_cred_getuid(cred) != vfsp->f_owner) {
 862                        return (EACCES); /* must be owner of file system */
 863                }
 864                if (!vnode_isvroot(vp)) {
 865                        return (EINVAL);
 866                }
 867                newsize = *(u_int64_t *)ap->a_data;
 868                cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 869                
 870                if (newsize > cursize) {
 871                        return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 872                } else if (newsize < cursize) {
 873                        return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 874                } else {
 875                        return (0);
 876                }
 877        }
 878        case HFS_CHANGE_NEXT_ALLOCATION: {
 879                u_int32_t location;
 880
 881                if (vnode_vfsisrdonly(vp)) {
 882                        return (EROFS);
 883                }
 884                vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 885                if (suser(cred, NULL) &&
 886                        kauth_cred_getuid(cred) != vfsp->f_owner) {
 887                        return (EACCES); /* must be owner of file system */
 888                }
 889                if (!vnode_isvroot(vp)) {
 890                        return (EINVAL);
 891                }
 892                location = *(u_int32_t *)ap->a_data;
 893                if (location > hfsmp->totalBlocks - 1) {
 894                        return (EINVAL);
 895                }
 896                /* Return previous value. */
 897                *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 898                HFS_MOUNT_LOCK(hfsmp, TRUE);
 899                hfsmp->nextAllocation = location;
 900                hfsmp->vcbFlags |= 0xFF00;
 901                HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 902                return (0);
 903        }
 904
 905#ifdef HFS_SPARSE_DEV
 906        case HFS_SETBACKINGSTOREINFO: {
 907                struct vnode * bsfs_rootvp;
 908                struct vnode * di_vp;
 909                struct hfs_backingstoreinfo *bsdata;
 910                int error = 0;
 911                
 912                if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 913                        return (EALREADY);
 914                }
 915                vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 916                if (suser(cred, NULL) &&
 917                        kauth_cred_getuid(cred) != vfsp->f_owner) {
 918                        return (EACCES); /* must be owner of file system */
 919                }
 920                bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 921                if (bsdata == NULL) {
 922                        return (EINVAL);
 923                }
 924                if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 925                        return (error);
 926                }
 927                if ((error = vnode_getwithref(di_vp))) {
 928                        file_drop(bsdata->backingfd);
 929                        return(error);
 930                }
 931
 932                if (vnode_mount(vp) == vnode_mount(di_vp)) {
 933                        (void)vnode_put(di_vp);
 934                        file_drop(bsdata->backingfd);
 935                        return (EINVAL);
 936                }
 937
 938                /*
 939                 * Obtain the backing fs root vnode and keep a reference
 940                 * on it.  This reference will be dropped in hfs_unmount.
 941                 */
 942                error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 943                if (error) {
 944                        (void)vnode_put(di_vp);
 945                        file_drop(bsdata->backingfd);
 946                        return (error);
 947                }
 948                vnode_ref(bsfs_rootvp);
 949                vnode_put(bsfs_rootvp);
 950
 951                hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 952                hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 953                hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 954                hfsmp->hfs_sparsebandblks *= 4;
 955
 956                (void)vnode_put(di_vp);
 957                file_drop(bsdata->backingfd);
 958                return (0);
 959        }
 960        case HFS_CLRBACKINGSTOREINFO: {
 961                struct vnode * tmpvp;
 962
 963                vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 964                if (suser(cred, NULL) &&
 965                        kauth_cred_getuid(cred) != vfsp->f_owner) {
 966                        return (EACCES); /* must be owner of file system */
 967                }
 968                if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 969                    hfsmp->hfs_backingfs_rootvp) {
 970
 971                        hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 972                        tmpvp = hfsmp->hfs_backingfs_rootvp;
 973                        hfsmp->hfs_backingfs_rootvp = NULLVP;
 974                        hfsmp->hfs_sparsebandblks = 0;
 975                        vnode_rele(tmpvp);
 976                }
 977                return (0);
 978        }
 979#endif /* HFS_SPARSE_DEV */
 980
 981        case F_FREEZE_FS: {
 982                struct mount *mp;
 983                task_t task;
 984 
 985                if (!is_suser())
 986                        return (EACCES);
 987
 988                mp = vnode_mount(vp);
 989                hfsmp = VFSTOHFS(mp);
 990
 991                if (!(hfsmp->jnl))
 992                        return (ENOTSUP);
 993
 994                lck_rw_lock_exclusive(&hfsmp->hfs_insync);
 995 
 996                task = current_task();
 997                task_working_set_disable(task);
 998
 999                // flush things before we get started to try and prevent
1000                // dirty data from being paged out while we're frozen.
1001                // note: can't do this after taking the lock as it will
1002                // deadlock against ourselves.
1003                vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1004                hfs_global_exclusive_lock_acquire(hfsmp);
1005                journal_flush(hfsmp->jnl);
1006
1007                // don't need to iterate on all vnodes, we just need to
1008                // wait for writes to the system files and the device vnode
1009                if (HFSTOVCB(hfsmp)->extentsRefNum)
1010                    vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1011                if (HFSTOVCB(hfsmp)->catalogRefNum)
1012                    vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1013                if (HFSTOVCB(hfsmp)->allocationsRefNum)
1014                    vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1015                if (hfsmp->hfs_attribute_vp)
1016                    vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1017                vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1018
1019                hfsmp->hfs_freezing_proc = current_proc();
1020
1021                return (0);
1022        }
1023
1024        case F_THAW_FS: {
1025                if (!is_suser())
1026                        return (EACCES);
1027
1028                // if we're not the one who froze the fs then we
1029                // can't thaw it.
1030                if (hfsmp->hfs_freezing_proc != current_proc()) {
1031                    return EPERM;
1032                }
1033
1034                // NOTE: if you add code here, also go check the
1035                //       code that "thaws" the fs in hfs_vnop_close()
1036                //
1037                hfsmp->hfs_freezing_proc = NULL;
1038                hfs_global_exclusive_lock_release(hfsmp);
1039                lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1040
1041                return (0);
1042        }
1043
1044#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1045#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1046
1047        case HFS_BULKACCESS_FSCTL:
1048        case HFS_BULKACCESS: {
1049                /*
1050                 * NOTE: on entry, the vnode is locked. Incase this vnode
1051                 * happens to be in our list of file_ids, we'll note it
1052                 * avoid calling hfs_chashget_nowait() on that id as that
1053                 * will cause a "locking against myself" panic.
1054                 */
1055                Boolean check_leaf = true;
1056                
1057                struct user_access_t *user_access_structp;
1058                struct user_access_t tmp_user_access_t;
1059                struct access_cache cache;
1060                
1061                int error = 0, i;
1062                
1063                dev_t dev = VTOC(vp)->c_dev;
1064                
1065                short flags;
1066                struct ucred myucred;   /* XXX ILLEGAL */
1067                int num_files;
1068                int *file_ids = NULL;
1069                short *access = NULL;
1070                
1071                cnid_t cnid;
1072                cnid_t prevParent_cnid = 0;
1073                unsigned long myPerms;
1074                short myaccess = 0;
1075                struct cat_attr cnattr;
1076                CatalogKey catkey;
1077                struct cnode *skip_cp = VTOC(vp);
1078                struct vfs_context      my_context;
1079
1080                /* first, return error if not run as root */
1081                if (cred->cr_ruid != 0) {
1082                        return EPERM;
1083                }
1084                
1085                /* initialize the local cache and buffers */
1086                cache.numcached = 0;
1087                cache.cachehits = 0;
1088                cache.lookups = 0;
1089                
1090                file_ids = (int *) get_pathbuff();
1091                access = (short *) get_pathbuff();
1092                cache.acache = (int *) get_pathbuff();
1093                cache.haveaccess = (Boolean *) get_pathbuff();
1094                
1095                if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1096                        release_pathbuff((char *) file_ids);
1097                        release_pathbuff((char *) access);
1098                        release_pathbuff((char *) cache.acache);
1099                        release_pathbuff((char *) cache.haveaccess);
1100                        
1101                        return ENOMEM;
1102                }
1103                
1104                /* struct copyin done during dispatch... need to copy file_id array separately */
1105                if (ap->a_data == NULL) {
1106                        error = EINVAL;
1107                        goto err_exit_bulk_access;
1108                }
1109
1110                if (is64bit) {
1111                        user_access_structp = (struct user_access_t *)ap->a_data;
1112                }
1113                else {
1114                        struct access_t *       accessp = (struct access_t *)ap->a_data;
1115                        tmp_user_access_t.uid = accessp->uid;
1116                        tmp_user_access_t.flags = accessp->flags;
1117                        tmp_user_access_t.num_groups = accessp->num_groups;
1118                        tmp_user_access_t.num_files = accessp->num_files;
1119                        tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1120                        tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1121                        tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1122                        user_access_structp = &tmp_user_access_t;
1123                }
1124                
1125                num_files = user_access_structp->num_files;
1126                if (num_files < 1) {
1127                        goto err_exit_bulk_access;
1128                }
1129                if (num_files > 256) {
1130                        error = EINVAL;
1131                        goto err_exit_bulk_access;
1132                }
1133                
1134                if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1135                                                        num_files * sizeof(int)))) {
1136                        goto err_exit_bulk_access;
1137                }
1138                
1139                /* fill in the ucred structure */
1140                flags = user_access_structp->flags;
1141                if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1142                        flags = R_OK;
1143                }
1144                
1145                /* check if we've been passed leaf node ids or parent ids */
1146                if (flags & PARENT_IDS_FLAG) {
1147                        check_leaf = false;
1148                }
1149                
1150                memset(&myucred, 0, sizeof(myucred));
1151                myucred.cr_ref = 1;
1152                myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1153                myucred.cr_ngroups = user_access_structp->num_groups;
1154                if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1155                        myucred.cr_ngroups = 0;
1156                } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1157                                          myucred.cr_ngroups * sizeof(gid_t)))) {
1158                        goto err_exit_bulk_access;
1159                }
1160                myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1161                myucred.cr_gmuid = myucred.cr_uid;
1162                
1163                my_context.vc_proc = p;
1164                my_context.vc_ucred = &myucred;
1165
1166                /* Check access to each file_id passed in */
1167                for (i = 0; i < num_files; i++) {
1168#if 0
1169                        cnid = (cnid_t) file_ids[i];
1170                        
1171                        /* root always has access */
1172                        if (!suser(&myucred, NULL)) {
1173                                access[i] = 0;
1174                                continue;
1175                        }
1176                        
1177                        if (check_leaf) {
1178                                
1179                                /* do the lookup (checks the cnode hash, then the catalog) */
1180                                error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1181                                if (error) {
1182                                        access[i] = (short) error;
1183                                        continue;
1184                                }
1185                                                        
1186                                /* before calling CheckAccess(), check the target file for read access */
1187                                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1188                                                                  cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1189                                
1190                                
1191                                /* fail fast if no access */ 
1192                                if ((myPerms & flags) == 0) {
1193                                        access[i] = EACCES;
1194                                        continue;
1195                                }
1196                        } else {
1197                                /* we were passed an array of parent ids */
1198                                catkey.hfsPlus.parentID = cnid;
1199                        }
1200                        
1201                        /* if the last guy had the same parent and had access, we're done */
1202                        if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1203                                cache.cachehits++;
1204                                access[i] = 0;
1205                                continue;
1206                        }
1207                        
1208                        myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID, 
1209                                                   skip_cp, p, &myucred, dev);
1210                        
1211                        if ( myaccess ) {
1212                                access[i] = 0; // have access.. no errors to report
1213                        } else {
1214                                access[i] = (error != 0 ? (short) error : EACCES);
1215                        }
1216                        
1217                        prevParent_cnid = catkey.hfsPlus.parentID;
1218#else
1219                        int myErr;
1220                        
1221                        cnid = (cnid_t)file_ids[i];
1222                        
1223                        while (cnid >= kRootDirID) {
1224                            /* get the vnode for this cnid */
1225                            myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1226                            if ( myErr ) {
1227                                access[i] = EACCES;
1228                                break;
1229                            }
1230
1231                            cnid = VTOC(vp)->c_parentcnid;
1232
1233                            hfs_unlock(VTOC(vp));
1234                            if (vnode_vtype(vp) == VDIR) {
1235                                myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1236                            } else {
1237                                myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1238                            }
1239                            vnode_put(vp);
1240                            access[i] = myErr;
1241                            if (myErr) {
1242                                break;
1243                            }
1244                        }
1245#endif                  
1246                }
1247                
1248                /* copyout the access array */
1249                if ((error = copyout((caddr_t)access, user_access_structp->access, 
1250                                     num_files * sizeof (short)))) {
1251                        goto err_exit_bulk_access;
1252                }
1253                
1254        err_exit_bulk_access:
1255                
1256                //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1257                
1258                release_pathbuff((char *) cache.acache);
1259                release_pathbuff((char *) cache.haveaccess);
1260                release_pathbuff((char *) file_ids);
1261                release_pathbuff((char *) access);
1262                
1263                return (error);
1264        } /* HFS_BULKACCESS */
1265
1266        case HFS_SETACLSTATE: {
1267                int state;
1268
1269                if (ap->a_data == NULL) {
1270                        return (EINVAL);
1271                }
1272
1273                vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1274                state = *(int *)ap->a_data;
1275
1276                // super-user can enable or disable acl's on a volume.
1277                // the volume owner can only enable acl's
1278                if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1279                        return (EPERM);
1280                }
1281                if (state == 0 || state == 1)
1282                        return hfs_setextendedsecurity(hfsmp, state);
1283                else
1284                        return (EINVAL);        
1285        }
1286
1287        case F_FULLFSYNC: {
1288                int error;
1289
1290                error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1291                if (error == 0) {
1292                        error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1293                        hfs_unlock(VTOC(vp));
1294                }
1295
1296                return error;
1297        }
1298
1299        case F_CHKCLEAN: {
1300                register struct cnode *cp;
1301                int error;
1302
1303                if (!vnode_isreg(vp))
1304                        return EINVAL;
1305 
1306                error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1307                if (error == 0) {
1308                        cp = VTOC(vp);
1309                        /*
1310                         * used by regression test to determine if 
1311                         * all the dirty pages (via write) have been cleaned
1312                         * after a call to 'fsysnc'.
1313                         */
1314                        error = is_file_clean(vp, VTOF(vp)->ff_size);
1315                        hfs_unlock(cp);
1316                }
1317                return (error);
1318        }
1319
1320        case F_RDADVISE: {
1321                register struct radvisory *ra;
1322                struct filefork *fp;
1323                int error;
1324
1325                if (!vnode_isreg(vp))
1326                        return EINVAL;
1327 
1328                ra = (struct radvisory *)(ap->a_data);
1329                fp = VTOF(vp);
1330
1331                /* Protect against a size change. */
1332                hfs_lock_truncate(VTOC(vp), TRUE);
1333
1334                if (ra->ra_offset >= fp->ff_size) {
1335                        error = EFBIG;
1336                } else {
1337                        error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1338                }
1339
1340                hfs_unlock_truncate(VTOC(vp));
1341                return (error);
1342        }
1343
1344        case F_READBOOTSTRAP:
1345        case F_WRITEBOOTSTRAP:
1346        {
1347            struct vnode *devvp = NULL;
1348            user_fbootstraptransfer_t *user_bootstrapp;
1349            int devBlockSize;
1350            int error;
1351            uio_t auio;
1352            daddr64_t blockNumber;
1353            u_long blockOffset;
1354            u_long xfersize;
1355            struct buf *bp;
1356            user_fbootstraptransfer_t user_bootstrap;
1357
1358                if (!vnode_isvroot(vp))
1359                        return (EINVAL);
1360                /* LP64 - when caller is a 64 bit process then we are passed a pointer 
1361                 * to a user_fbootstraptransfer_t else we get a pointer to a 
1362                 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1363                 */
1364                if (is64bit) {
1365                        user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1366                }
1367                else {
1368                fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1369                        user_bootstrapp = &user_bootstrap;
1370                        user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1371                        user_bootstrap.fbt_length = bootstrapp->fbt_length;
1372                        user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1373                }
1374                if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) 
1375                        return EINVAL;
1376            
1377            devvp = VTOHFS(vp)->hfs_devvp;
1378                auio = uio_create(1, user_bootstrapp->fbt_offset, 
1379                                                  is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1380                                                  (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1381                uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1382
1383            devBlockSize = vfs_devblocksize(vnode_mount(vp));
1384
1385            while (uio_resid(auio) > 0) {
1386                        blockNumber = uio_offset(auio) / devBlockSize;
1387                        error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1388                        if (error) {
1389                                if (bp) buf_brelse(bp);
1390                                uio_free(auio);
1391                                return error;
1392                        };
1393
1394                        blockOffset = uio_offset(auio) % devBlockSize;
1395                        xfersize = devBlockSize - blockOffset;
1396                        error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1397                        if (error) {
1398                                buf_brelse(bp);
1399                                uio_free(auio);
1400                                return error;
1401                        };
1402                        if (uio_rw(auio) == UIO_WRITE) {
1403                                error = VNOP_BWRITE(bp);
1404                                if (error) {
1405                                        uio_free(auio);
1406                        return error;
1407                                }
1408                        } else {
1409                                buf_brelse(bp);
1410                        };
1411                };
1412                uio_free(auio);
1413        };
1414        return 0;
1415
1416        case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1417        {
1418                if (is64bit) {
1419                        *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1420                }
1421                else {
1422                        *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1423                }
1424                return 0;
1425        }
1426
1427        case HFS_GET_MOUNT_TIME:
1428            return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1429            break;
1430
1431        case HFS_GET_LAST_MTIME:
1432            return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1433            break;
1434
1435        case HFS_SET_BOOT_INFO:
1436                if (!vnode_isvroot(vp))
1437                        return(EINVAL);
1438                if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1439                        return(EACCES); /* must be superuser or owner of filesystem */
1440                HFS_MOUNT_LOCK(hfsmp, TRUE);
1441                bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1442                HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1443                (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1444                break;
1445                
1446        case HFS_GET_BOOT_INFO:
1447                if (!vnode_isvroot(vp))
1448                        return(EINVAL);
1449                HFS_MOUNT_LOCK(hfsmp, TRUE);
1450                bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1451                HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1452                break;
1453
1454        default:
1455                return (ENOTTY);
1456        }
1457
1458    /* Should never get here */
1459        return 0;
1460}
1461
1462/*
1463 * select
1464 */
1465int
1466hfs_vnop_select(__unused struct vnop_select_args *ap)
1467/*
1468        struct vnop_select_args {
1469                vnode_t a_vp;
1470                int  a_which;
1471                int  a_fflags;
1472                void *a_wql;
1473                vfs_context_t a_context;
1474        };
1475*/
1476{
1477        /*
1478         * We should really check to see if I/O is possible.
1479         */
1480        return (1);
1481}
1482
1483/*
1484 * Converts a logical block number to a physical block, and optionally returns
1485 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1486 * The physical block number is based on the device block size, currently its 512.
1487 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1488 */
1489int
1490hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1491{
1492        struct cnode *cp = VTOC(vp);
1493        struct filefork *fp = VTOF(vp);
1494        struct hfsmount *hfsmp = VTOHFS(vp);
1495        int  retval = E_NONE;
1496        daddr_t  logBlockSize;
1497        size_t  bytesContAvail = 0;
1498        off_t  blockposition;
1499        int lockExtBtree;
1500        int lockflags = 0;
1501
1502        /*
1503         * Check for underlying vnode requests and ensure that logical
1504         * to physical mapping is requested.
1505         */
1506        if (vpp != NULL)
1507                *vpp = cp->c_devvp;
1508        if (bnp == NULL)
1509                return (0);
1510
1511        logBlockSize = GetLogicalBlockSize(vp);
1512        blockposition = (off_t)bn * (off_t)logBlockSize;
1513
1514        lockExtBtree = overflow_extents(fp);
1515
1516        if (lockExtBtree)
1517                lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1518
1519        retval = MacToVFSError(
1520                            MapFileBlockC (HFSTOVCB(hfsmp),
1521                                            (FCB*)fp,
1522                                            MAXPHYSIO,
1523                                            blockposition,
1524                                            bnp,
1525                                            &bytesContAvail));
1526
1527        if (lockExtBtree)
1528                hfs_systemfile_unlock(hfsmp, lockflags);
1529
1530        if (retval == E_NONE) {
1531                /* Figure out how many read ahead blocks there are */
1532                if (runp != NULL) {
1533                        if (can_cluster(logBlockSize)) {
1534                                /* Make sure this result never goes negative: */
1535                                *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1536                        } else {
1537                                *runp = 0;
1538                        }
1539                }
1540        }
1541        return (retval);
1542}
1543
1544/*
1545 * Convert logical block number to file offset.
1546 */
1547int
1548hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1549/*
1550        struct vnop_blktooff_args {
1551                vnode_t a_vp;
1552                daddr64_t a_lblkno;  
1553                off_t *a_offset;
1554        };
1555*/
1556{       
1557        if (ap->a_vp == NULL)
1558                return (EINVAL);
1559        *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1560
1561        return(0);
1562}
1563
1564/*
1565 * Convert file offset to logical block number.
1566 */
1567int
1568hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1569/*
1570        struct vnop_offtoblk_args {
1571                vnode_t a_vp;
1572                off_t a_offset;    
1573                daddr64_t *a_lblkno;
1574        };
1575*/
1576{       
1577        if (ap->a_vp == NULL)
1578                return (EINVAL);
1579        *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1580
1581        return(0);
1582}
1583
1584/*
1585 * Map file offset to physical block number.
1586 *
1587 * System file cnodes are expected to be locked (shared or exclusive).
1588 */
1589int
1590hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1591/*
1592        struct vnop_blockmap_args {
1593                vnode_t a_vp;
1594                off_t a_foffset;    
1595                size_t a_size;
1596                daddr64_t *a_bpn;
1597                size_t *a_run;
1598                void *a_poff;
1599                int a_flags;
1600                vfs_context_t a_context;
1601        };
1602*/
1603{
1604        struct vnode *vp = ap->a_vp;
1605        struct cnode *cp;
1606        struct filefork *fp;
1607        struct hfsmount *hfsmp;
1608        size_t bytesContAvail = 0;
1609        int retval = E_NONE;
1610        int syslocks = 0;
1611        int lockflags = 0;
1612        struct rl_entry *invalid_range;
1613        enum rl_overlaptype overlaptype;
1614        int started_tr = 0;
1615        int tooklock = 0;
1616
1617        /* Do not allow blockmap operation on a directory */
1618        if (vnode_isdir(vp)) {
1619                return (ENOTSUP);
1620        }
1621
1622        /*
1623         * Check for underlying vnode requests and ensure that logical
1624         * to physical mapping is requested.
1625         */
1626        if (ap->a_bpn == NULL)
1627                return (0);
1628
1629        if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1630                if (VTOC(vp)->c_lockowner != current_thread()) {
1631                        hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1632                        tooklock = 1;
1633                } else {
1634                        cp = VTOC(vp);
1635                        panic("blockmap: %s cnode lock already held!\n",
1636                                cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1637                }
1638        }
1639        hfsmp = VTOHFS(vp);
1640        cp = VTOC(vp);
1641        fp = VTOF(vp);
1642
1643retry:
1644        if (fp->ff_unallocblocks) {
1645                if (hfs_start_transaction(hfsmp) != 0) {
1646                        retval = EINVAL;
1647                        goto exit;
1648                } else {
1649                        started_tr = 1;
1650                }
1651                syslocks = SFL_EXTENTS | SFL_BITMAP;
1652                
1653        } else if (overflow_extents(fp)) {
1654                syslocks = SFL_EXTENTS;
1655        }
1656        
1657        if (syslocks)
1658                lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1659
1660        /*
1661         * Check for any delayed allocations.
1662         */
1663        if (fp->ff_unallocblocks) {
1664                SInt64 actbytes;
1665                u_int32_t loanedBlocks;
1666
1667                // 
1668                // Make sure we have a transaction.  It's possible
1669                // that we came in and fp->ff_unallocblocks was zero
1670                // but during the time we blocked acquiring the extents
1671                // btree, ff_unallocblocks became non-zero and so we
1672                // will need to start a transaction.
1673                //
1674                if (started_tr == 0) {
1675                        if (syslocks) {
1676                                hfs_systemfile_unlock(hfsmp, lockflags);
1677                                syslocks = 0;
1678                        }
1679                        goto retry;
1680                }
1681
1682                /*
1683                 * Note: ExtendFileC will Release any blocks on loan and
1684                 * aquire real blocks.  So we ask to extend by zero bytes
1685                 * since ExtendFileC will account for the virtual blocks.
1686                 */
1687
1688                loanedBlocks = fp->ff_unallocblocks;
1689                retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1690                                     kEFAllMask | kEFNoClumpMask, &actbytes);
1691
1692                if (retval) {
1693                        fp->ff_unallocblocks = loanedBlocks;
1694                        cp->c_blocks += loanedBlocks;
1695                        fp->ff_blocks += loanedBlocks;
1696
1697                        HFS_MOUNT_LOCK(hfsmp, TRUE);
1698                        hfsmp->loanedBlocks += loanedBlocks;
1699                        HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1700                }
1701
1702                if (retval) {
1703                        hfs_systemfile_unlock(hfsmp, lockflags);
1704                        cp->c_flag |= C_MODIFIED;
1705                        if (started_tr) {
1706                                (void) hfs_update(vp, TRUE);
1707                                (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1708
1709                                hfs_end_transaction(hfsmp);
1710                        }
1711                        goto exit;
1712                }
1713        }
1714
1715        retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1716                               ap->a_bpn, &bytesContAvail);
1717        if (syslocks) {
1718                hfs_systemfile_unlock(hfsmp, lockflags);
1719                syslocks = 0;
1720        }
1721
1722        if (started_tr) {
1723                (void) hfs_update(vp, TRUE);
1724                (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1725                hfs_end_transaction(hfsmp);
1726                started_tr = 0;
1727        }       
1728        if (retval) {
1729                goto exit;
1730        }
1731
1732        /* Adjust the mapping information for invalid file ranges: */
1733        overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1734                              ap->a_foffset + (off_t)bytesContAvail - 1,
1735                              &invalid_range);
1736        if (overlaptype != RL_NOOVERLAP) {
1737                switch(overlaptype) {
1738                case RL_MATCHINGOVERLAP:
1739                case RL_OVERLAPCONTAINSRANGE:
1740                case RL_OVERLAPSTARTSBEFORE:
1741                        /* There's no valid block for this byte offset: */
1742                        *ap->a_bpn = (daddr64_t)-1;
1743                        /* There's no point limiting the amount to be returned
1744                         * if the invalid range that was hit extends all the way 
1745                         * to the EOF (i.e. there's no valid bytes between the
1746                         * end of this range and the file's EOF):
1747                         */
1748                        if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1749                            (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1750                                bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1751                        }
1752                        break;
1753        
1754                case RL_OVERLAPISCONTAINED:
1755                case RL_OVERLAPENDSAFTER:
1756                        /* The range of interest hits an invalid block before the end: */
1757                        if (invalid_range->rl_start == ap->a_foffset) {
1758                                /* There's actually no valid information to be had starting here: */
1759                                *ap->a_bpn = (daddr64_t)-1;
1760                                if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1761                                    (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1762                                        bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1763                                }
1764                        } else {
1765                                bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1766                        }
1767                        break;
1768
1769                case RL_NOOVERLAP:
1770                        break;
1771                } /* end switch */
1772                if (bytesContAvail > ap->a_size)
1773                        bytesContAvail = ap->a_size;
1774        }
1775        if (ap->a_run)
1776                *ap->a_run = bytesContAvail;
1777
1778        if (ap->a_poff)
1779                *(int *)ap->a_poff = 0;
1780exit:
1781        if (tooklock)
1782                hfs_unlock(cp);
1783
1784        return (MacToVFSError(retval));
1785}
1786
1787
1788/*
1789 * prepare and issue the I/O
1790 * buf_strategy knows how to deal
1791 * with requests that require 
1792 * fragmented I/Os
1793 */
1794int
1795hfs_vnop_strategy(struct vnop_strategy_args *ap)
1796{
1797        buf_t   bp = ap->a_bp;
1798        vnode_t vp = buf_vnode(bp);
1799        struct cnode *cp = VTOC(vp);
1800
1801        return (buf_strategy(cp->c_devvp, ap));
1802}
1803
1804
1805static int
1806do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1807{
1808        register struct cnode *cp = VTOC(vp);
1809        struct filefork *fp = VTOF(vp);
1810        struct proc *p = vfs_context_proc(context);;
1811        kauth_cred_t cred = vfs_context_ucred(context);
1812        int retval;
1813        off_t bytesToAdd;
1814        off_t actualBytesAdded;
1815        off_t filebytes;
1816        u_int64_t old_filesize;
1817        u_long fileblocks;
1818        int blksize;
1819        struct hfsmount *hfsmp;
1820        int lockflags;
1821
1822        blksize = VTOVCB(vp)->blockSize;
1823        fileblocks = fp->ff_blocks;
1824        filebytes = (off_t)fileblocks * (off_t)blksize;
1825        old_filesize = fp->ff_size;
1826
1827        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1828                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1829
1830        if (length < 0)
1831                return (EINVAL);
1832
1833        if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1834                return (EFBIG);
1835
1836        hfsmp = VTOHFS(vp);
1837
1838        retval = E_NONE;
1839
1840        /* Files that are changing size are not hot file candidates. */
1841        if (hfsmp->hfc_stage == HFC_RECORDING) {
1842                fp->ff_bytesread = 0;
1843        }
1844
1845        /* 
1846         * We cannot just check if fp->ff_size == length (as an optimization)
1847         * since there may be extra physical blocks that also need truncation.
1848         */
1849#if QUOTA
1850        if ((retval = hfs_getinoquota(cp)))
1851                return(retval);
1852#endif /* QUOTA */
1853
1854        /*
1855         * Lengthen the size of the file. We must ensure that the
1856         * last byte of the file is allocated. Since the smallest
1857         * value of ff_size is 0, length will be at least 1.
1858         */
1859        if (length > (off_t)fp->ff_size) {
1860#if QUOTA
1861                retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1862                                   cred, 0);
1863                if (retval)
1864                        goto Err_Exit;
1865#endif /* QUOTA */
1866                /*
1867                 * If we don't have enough physical space then
1868                 * we need to extend the physical size.
1869                 */
1870                if (length > filebytes) {
1871                        int eflags;
1872                        u_long blockHint = 0;
1873
1874                        /* All or nothing and don't round up to clumpsize. */
1875                        eflags = kEFAllMask | kEFNoClumpMask;
1876
1877                        if (cred && suser(cred, NULL) != 0)
1878                                eflags |= kEFReserveMask;  /* keep a reserve */
1879
1880                        /*
1881                         * Allocate Journal and Quota files in metadata zone.
1882                         */
1883                        if (filebytes == 0 &&
1884                            hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1885                            hfs_virtualmetafile(cp)) {
1886                                eflags |= kEFMetadataMask;
1887                                blockHint = hfsmp->hfs_metazone_start;
1888                        }
1889                        if (hfs_start_transaction(hfsmp) != 0) {
1890                            retval = EINVAL;
1891                            goto Err_Exit;
1892                        }
1893
1894                        /* Protect extents b-tree and allocation bitmap */
1895                        lockflags = SFL_BITMAP;
1896                        if (overflow_extents(fp))
1897                                lockflags |= SFL_EXTENTS;
1898                        lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1899
1900                        while ((length > filebytes) && (retval == E_NONE)) {
1901                                bytesToAdd = length - filebytes;
1902                                retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1903                                                    (FCB*)fp,
1904                                                    bytesToAdd,
1905                                                    blockHint,
1906                                                    eflags,
1907                                                    &actualBytesAdded));
1908
1909                                filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1910                                if (actualBytesAdded == 0 && retval == E_NONE) {
1911                                        if (length > filebytes)
1912                                                length = filebytes;
1913                                        break;
1914                                }
1915                        } /* endwhile */
1916
1917                        hfs_systemfile_unlock(hfsmp, lockflags);
1918
1919                        if (hfsmp->jnl) {
1920                            (void) hfs_update(vp, TRUE);
1921                            (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1922                        }
1923
1924                        hfs_end_transaction(hfsmp);
1925
1926                        if (retval)
1927                                goto Err_Exit;
1928
1929                        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1930                                (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1931                }
1932 
1933                if (!(flags & IO_NOZEROFILL)) {
1934                        if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1935                                struct rl_entry *invalid_range;
1936                                off_t zero_limit;
1937                        
1938                                zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1939                                if (length < zero_limit) zero_limit = length;
1940
1941                                if (length > (off_t)fp->ff_size) {
1942                                        struct timeval tv;
1943
1944                                        /* Extending the file: time to fill out the current last page w. zeroes? */
1945                                        if ((fp->ff_size & PAGE_MASK_64) &&
1946                                            (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1947                                            fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1948                                                
1949                                                /* There's some valid data at the start of the (current) last page
1950                                                   of the file, so zero out the remainder of that page to ensure the
1951                                                   entire page contains valid data.  Since there is no invalid range
1952                                                   possible past the (current) eof, there's no need to remove anything
1953                                                   from the invalid range list before calling cluster_write():  */
1954                                                hfs_unlock(cp);
1955                                                retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1956                                                                fp->ff_size, (off_t)0,
1957                                                                (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1958                                                hfs_lock(cp, HFS_FORCE_LOCK);
1959                                                if (retval) goto Err_Exit;
1960                                                
1961                                                /* Merely invalidate the remaining area, if necessary: */
1962                                                if (length > zero_limit) {
1963                                                        microuptime(&tv);
1964                                                        rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1965                                                        cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1966                                                }
1967                                        } else {
1968                                        /* The page containing the (current) eof is invalid: just add the
1969                                           remainder of the page to the invalid list, along with the area
1970                                           being newly allocated:
1971                                         */
1972                                        microuptime(&tv);
1973                                        rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1974                                        cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1975                                        };
1976                                }
1977                        } else {
1978                                        panic("hfs_truncate: invoked on non-UBC object?!");
1979                        };
1980                }
1981                cp->c_touch_modtime = TRUE;
1982                fp->ff_size = length;
1983
1984                /* Nested transactions will do their own ubc_setsize. */
1985                if (!skipsetsize) {
1986                        /*
1987                         * ubc_setsize can cause a pagein here 
1988                         * so we need to drop cnode lock. 
1989                         */
1990                        hfs_unlock(cp);
1991                        ubc_setsize(vp, length);
1992                        hfs_lock(cp, HFS_FORCE_LOCK);
1993                }
1994
1995        } else { /* Shorten the size of the file */
1996
1997                if ((off_t)fp->ff_size > length) {
1998                        /*
1999                         * Any buffers that are past the truncation point need to be
2000                         * invalidated (to maintain buffer cache consistency).
2001                         */
2002
2003                         /* Nested transactions will do their own ubc_setsize. */
2004                         if (!skipsetsize) {
2005                                /*
2006                                 * ubc_setsize can cause a pageout here 
2007                                 * so we need to drop cnode lock. 
2008                                 */
2009                                hfs_unlock(cp);
2010                                ubc_setsize(vp, length);
2011                                hfs_lock(cp, HFS_FORCE_LOCK);
2012                        }
2013            
2014                        /* Any space previously marked as invalid is now irrelevant: */
2015                        rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2016                }
2017
2018                /* 
2019                 * Account for any unmapped blocks. Note that the new
2020                 * file length can still end up with unmapped blocks.
2021                 */
2022                if (fp->ff_unallocblocks > 0) {
2023                        u_int32_t finalblks;
2024                        u_int32_t loanedBlocks;
2025
2026                        HFS_MOUNT_LOCK(hfsmp, TRUE);
2027
2028                        loanedBlocks = fp->ff_unallocblocks;
2029                        cp->c_blocks -= loanedBlocks;
2030                        fp->ff_blocks -= loanedBlocks;
2031                        fp->ff_unallocblocks = 0;
2032
2033                        hfsmp->loanedBlocks -= loanedBlocks;
2034
2035                        finalblks = (length + blksize - 1) / blksize;
2036                        if (finalblks > fp->ff_blocks) {
2037                                /* calculate required unmapped blocks */
2038                                loanedBlocks = finalblks - fp->ff_blocks;
2039                                hfsmp->loanedBlocks += loanedBlocks;
2040
2041                                fp->ff_unallocblocks = loanedBlocks;
2042                                cp->c_blocks += loanedBlocks;
2043                                fp->ff_blocks += loanedBlocks;
2044                        }
2045                        HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2046                }
2047
2048                /*
2049                 * For a TBE process the deallocation of the file blocks is
2050                 * delayed until the file is closed.  And hfs_close calls
2051                 * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2052                 * isn't set, we make sure this isn't a TBE process.
2053                 */
2054                if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2055#if QUOTA
2056                  off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2057#endif /* QUOTA */
2058                  if (hfs_start_transaction(hfsmp) != 0) {
2059                      retval = EINVAL;
2060                      goto Err_Exit;
2061                  }
2062
2063                        if (fp->ff_unallocblocks == 0) {
2064                                /* Protect extents b-tree and allocation bitmap */
2065                                lockflags = SFL_BITMAP;
2066                                if (overflow_extents(fp))
2067                                        lockflags |= SFL_EXTENTS;
2068                                lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2069
2070                                retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2071                                                (FCB*)fp, length, false));
2072
2073                                hfs_systemfile_unlock(hfsmp, lockflags);
2074                        }
2075                        if (hfsmp->jnl) {
2076                                if (retval == 0) {
2077                                        fp->ff_size = length;
2078                                }
2079                                (void) hfs_update(vp, TRUE);
2080                                (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2081                        }
2082
2083                        hfs_end_transaction(hfsmp);
2084
2085                        filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2086                        if (retval)
2087                                goto Err_Exit;
2088#if QUOTA
2089                        /* These are bytesreleased */
2090                        (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2091#endif /* QUOTA */
2092                }
2093                /* Only set update flag if the logical length changes */
2094                if (old_filesize != length)
2095                        cp->c_touch_modtime = TRUE;
2096                fp->ff_size = length;
2097        }
2098        cp->c_touch_chgtime = TRUE;
2099        retval = hfs_update(vp, MNT_WAIT);
2100        if (retval) {
2101                KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2102                     -1, -1, -1, retval, 0);
2103        }
2104
2105Err_Exit:
2106
2107        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2108                 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2109
2110        return (retval);
2111}
2112
2113
2114
2115/*
2116 * Truncate a cnode to at most length size, freeing (or adding) the
2117 * disk blocks.
2118 */
2119__private_extern__
2120int
2121hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2122             vfs_context_t context)
2123{
2124        struct filefork *fp = VTOF(vp);
2125        off_t filebytes;
2126        u_long fileblocks;
2127        int blksize, error = 0;
2128        struct cnode *cp = VTOC(vp);
2129
2130        if (vnode_isdir(vp))
2131                return (EISDIR);        /* cannot truncate an HFS directory! */
2132
2133        blksize = VTOVCB(vp)->blockSize;
2134        fileblocks = fp->ff_blocks;
2135        filebytes = (off_t)fileblocks * (off_t)blksize;
2136
2137        // have to loop truncating or growing files that are
2138        // really big because otherwise transactions can get
2139        // enormous and consume too many kernel resources.
2140
2141        if (length < filebytes) {
2142                while (filebytes > length) {
2143                        if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2144                                filebytes -= HFS_BIGFILE_SIZE;
2145                        } else {
2146                                filebytes = length;
2147                        }
2148                        cp->c_flag |= C_FORCEUPDATE;
2149                        error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2150                        if (error)
2151                                break;
2152                }
2153        } else if (length > filebytes) {
2154                while (filebytes < length) {
2155                        if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2156                                filebytes += HFS_BIGFILE_SIZE;
2157                        } else {
2158                                filebytes = length;
2159                        }
2160                        cp->c_flag |= C_FORCEUPDATE;
2161                        error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2162                        if (error)
2163                                break;
2164                }
2165        } else /* Same logical size */ {
2166
2167                error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2168        }
2169        /* Files that are changing size are not hot file candidates. */
2170        if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2171                fp->ff_bytesread = 0;
2172        }
2173
2174        return (error);
2175}
2176
2177
2178
2179/*
2180 * Preallocate file storage space.
2181 */
2182int
2183hfs_vnop_allocate(struct vnop_allocate_args /* {
2184                vnode_t a_vp;
2185                off_t a_length;
2186                u_int32_t  a_flags;
2187                off_t *a_bytesallocated;
2188                off_t a_offset;
2189                vfs_context_t a_context;
2190        } */ *ap)
2191{
2192        struct vnode *vp = ap->a_vp;
2193        struct cnode *cp;
2194        struct filefork *fp;
2195        ExtendedVCB *vcb;
2196        off_t length = ap->a_length;
2197        off_t startingPEOF;
2198        off_t moreBytesRequested;
2199        off_t actualBytesAdded;
2200        off_t filebytes;
2201        u_long fileblocks;
2202        int retval, retval2;
2203        UInt32 blockHint;
2204        UInt32 extendFlags;   /* For call to ExtendFileC */
2205        struct hfsmount *hfsmp;
2206        kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2207        int lockflags;
2208
2209        *(ap->a_bytesallocated) = 0;
2210
2211        if (!vnode_isreg(vp))
2212                return (EISDIR);
2213        if (length < (off_t)0)
2214                return (EINVAL);
2215
2216        if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2217                return (retval);
2218        cp = VTOC(vp);
2219        fp = VTOF(vp);
2220        hfsmp = VTOHFS(vp);
2221        vcb = VTOVCB(vp);
2222
2223        fileblocks = fp->ff_blocks;
2224        filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2225
2226        if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2227                retval = EINVAL;
2228                goto Err_Exit;
2229        }
2230
2231        /* Fill in the flags word for the call to Extend the file */
2232
2233        extendFlags = kEFNoClumpMask;
2234        if (ap->a_flags & ALLOCATECONTIG) 
2235                extendFlags |= kEFContigMask;
2236        if (ap->a_flags & ALLOCATEALL)
2237                extendFlags |= kEFAllMask;
2238        if (cred && suser(cred, NULL) != 0)
2239                extendFlags |= kEFReserveMask;
2240
2241        retval = E_NONE;
2242        blockHint = 0;
2243        startingPEOF = filebytes;
2244
2245        if (ap->a_flags & ALLOCATEFROMPEOF)
2246                length += filebytes;
2247        else if (ap->a_flags & ALLOCATEFROMVOL)
2248                blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2249
2250        /* If no changes are necesary, then we're done */
2251        if (filebytes == length)
2252                goto Std_Exit;
2253
2254        /*
2255         * Lengthen the size of the file. We must ensure that the
2256         * last byte of the file is allocated. Since the smallest
2257         * value of filebytes is 0, length will be at least 1.
2258         */
2259        if (length > filebytes) {
2260                moreBytesRequested = length - filebytes;
2261                
2262#if QUOTA
2263                retval = hfs_chkdq(cp,
2264                                (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), 
2265                                cred, 0);
2266                if (retval)
2267                        goto Err_Exit;
2268
2269#endif /* QUOTA */
2270                /*
2271                 * Metadata zone checks.
2272                 */
2273                if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2274                        /*
2275                         * Allocate Journal and Quota files in metadata zone.
2276                         */
2277                        if (hfs_virtualmetafile(cp)) {
2278                                extendFlags |= kEFMetadataMask;
2279                                blockHint = hfsmp->hfs_metazone_start;
2280                        } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2281                                   (blockHint <= hfsmp->hfs_metazone_end)) {
2282                                /*
2283                                 * Move blockHint outside metadata zone.
2284                                 */
2285                                blockHint = hfsmp->hfs_metazone_end + 1;
2286                        }
2287                }
2288
2289                if (hfs_start_transaction(hfsmp) != 0) {
2290                    retval = EINVAL;
2291                    goto Err_Exit;
2292                }
2293
2294                /* Protect extents b-tree and allocation bitmap */
2295                lockflags = SFL_BITMAP;
2296                if (overflow_extents(fp))
2297                        lockflags |= SFL_EXTENTS;
2298                lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2299
2300                retval = MacToVFSError(ExtendFileC(vcb,
2301                                                (FCB*)fp,
2302                                                moreBytesRequested,
2303                                                blockHint,
2304                                                extendFlags,
2305                                                &actualBytesAdded));
2306
2307                *(ap->a_bytesallocated) = actualBytesAdded;
2308                filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2309
2310                hfs_systemfile_unlock(hfsmp, lockflags);
2311
2312                if (hfsmp->jnl) {
2313                        (void) hfs_update(vp, TRUE);
2314                        (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2315                }
2316
2317                hfs_end_transaction(hfsmp);
2318
2319                /*
2320                 * if we get an error and no changes were made then exit
2321                 * otherwise we must do the hfs_update to reflect the changes
2322                 */
2323                if (retval && (startingPEOF == filebytes))
2324                        goto Err_Exit;
2325        
2326                /*
2327                 * Adjust actualBytesAdded to be allocation block aligned, not
2328                 * clump size aligned.
2329                 * NOTE: So what we are reporting does not affect reality
2330                 * until the file is closed, when we truncate the file to allocation
2331                 * block size.
2332                 */
2333                if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2334                        *(ap->a_bytesallocated) =
2335                                roundup(moreBytesRequested, (off_t)vcb->blockSize);
2336
2337        } else { /* Shorten the size of the file */
2338
2339                if (fp->ff_size > length) {
2340                        /*
2341                         * Any buffers that are past the truncation point need to be
2342                         * invalidated (to maintain buffer cache consistency).
2343                         */
2344                }
2345
2346                if (hfs_start_transaction(hfsmp) != 0) {
2347                    retval = EINVAL;
2348                    goto Err_Exit;
2349                }
2350
2351                /* Protect extents b-tree and allocation bitmap */
2352                lockflags = SFL_BITMAP;
2353                if (overflow_extents(fp))
2354                        lockflags |= SFL_EXTENTS;
2355                lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2356
2357                retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2358
2359                hfs_systemfile_unlock(hfsmp, lockflags);
2360
2361                filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2362
2363                if (hfsmp->jnl) {
2364                        (void) hfs_update(vp, TRUE);
2365                        (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2366                }
2367
2368                hfs_end_transaction(hfsmp);
2369                
2370
2371                /*
2372                 * if we get an error and no changes were made then exit
2373                 * otherwise we must do the hfs_update to reflect the changes
2374                 */
2375                if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2376#if QUOTA
2377                /* These are  bytesreleased */
2378                (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2379#endif /* QUOTA */
2380
2381                if (fp->ff_size > filebytes) {
2382                        fp->ff_size = filebytes;
2383
2384                        hfs_unlock(cp);
2385                        ubc_setsize(vp, fp->ff_size);
2386                        hfs_lock(cp, HFS_FORCE_LOCK);
2387                }
2388        }
2389
2390Std_Exit:
2391        cp->c_touch_chgtime = TRUE;
2392        cp->c_touch_modtime = TRUE;
2393        retval2 = hfs_update(vp, MNT_WAIT);
2394
2395        if (retval == 0)
2396                retval = retval2;
2397Err_Exit:
2398        hfs_unlock(cp);
2399        return (retval);
2400}
2401
2402
2403/*
2404 * Pagein for HFS filesystem
2405 */
2406int
2407hfs_vnop_pagein(struct vnop_pagein_args *ap)
2408/*
2409        struct vnop_pagein_args {
2410                vnode_t a_vp,
2411                upl_t         a_pl,
2412                vm_offset_t   a_pl_offset,
2413                off_t         a_f_offset,
2414                size_t        a_size,
2415                int           a_flags
2416                vfs_context_t a_context;
2417        };
2418*/
2419{
2420        vnode_t vp = ap->a_vp;
2421        int error;
2422
2423        error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2424                               ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2425        /*
2426         * Keep track of blocks read.
2427         */
2428        if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2429                struct cnode *cp;
2430                struct filefork *fp;
2431                int bytesread;
2432                int took_cnode_lock = 0;
2433                
2434                cp = VTOC(vp);
2435                fp = VTOF(vp);
2436
2437                if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2438                        bytesread = fp->ff_size;
2439                else
2440                        bytesread = ap->a_size;
2441
2442                /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2443                if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2444                        hfs_lock(cp, HFS_FORCE_LOCK);
2445                        took_cnode_lock = 1;
2446                }
2447                /*
2448                 * If this file hasn't been seen since the start of
2449                 * the current sampling period then start over.
2450                 */
2451                if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2452                        struct timeval tv;
2453
2454                        fp->ff_bytesread = bytesread;
2455                        microtime(&tv);
2456                        cp->c_atime = tv.tv_sec;
2457                } else {
2458                        fp->ff_bytesread += bytesread;
2459                }
2460                cp->c_touch_acctime = TRUE;
2461                if (took_cnode_lock)
2462                        hfs_unlock(cp);
2463        }
2464        return (error);
2465}
2466
2467/* 
2468 * Pageout for HFS filesystem.
2469 */
2470int
2471hfs_vnop_pageout(struct vnop_pageout_args *ap)
2472/*
2473        struct vnop_pageout_args {
2474           vnode_t a_vp,
2475           upl_t         a_pl,
2476           vm_offset_t   a_pl_offset,
2477           off_t         a_f_offset,
2478           size_t        a_size,
2479           int           a_flags
2480           vfs_context_t a_context;
2481        };
2482*/
2483{
2484        vnode_t vp = ap->a_vp;
2485        struct cnode *cp;
2486        struct filefork *fp;
2487        int retval;
2488        off_t end_of_range;
2489        off_t filesize;
2490
2491        cp = VTOC(vp);
2492        if (cp->c_lockowner == current_thread()) {
2493                panic("pageout: %s cnode lock already held!\n",
2494                      cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2495        }
2496        if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2497                if (!(ap->a_flags & UPL_NOCOMMIT)) {
2498                        ubc_upl_abort_range(ap->a_pl,
2499                                            ap->a_pl_offset,
2500                                            ap->a_size,
2501                                            UPL_ABORT_FREE_ON_EMPTY);
2502                }
2503                return (retval);
2504        }
2505        fp = VTOF(vp);
2506
2507        filesize = fp->ff_size;
2508        end_of_range = ap->a_f_offset + ap->a_size - 1;
2509
2510        if (end_of_range >= filesize) {
2511                end_of_range = (off_t)(filesize - 1);
2512        }
2513        if (ap->a_f_offset < filesize) {
2514                rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2515                cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2516        }
2517        hfs_unlock(cp);
2518
2519        retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2520                                 ap->a_size, filesize, ap->a_flags);
2521
2522        /*
2523         * If data was written, and setuid or setgid bits are set and
2524         * this process is not the superuser then clear the setuid and
2525         * setgid bits as a precaution against tampering.
2526         */
2527        if ((retval == 0) &&
2528            (cp->c_mode & (S_ISUID | S_ISGID)) &&
2529            (vfs_context_suser(ap->a_context) != 0)) {
2530                hfs_lock(cp, HFS_FORCE_LOCK);
2531                cp->c_mode &= ~(S_ISUID | S_ISGID);
2532                cp->c_touch_chgtime = TRUE;
2533                hfs_unlock(cp);
2534        }
2535        return (retval);
2536}
2537
2538/*
2539 * Intercept B-Tree node writes to unswap them if necessary.
2540 */
2541int
2542hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2543{
2544        int retval = 0;
2545        register struct buf *bp = ap->a_bp;
2546        register struct vnode *vp = buf_vnode(bp);
2547        BlockDescriptor block;
2548
2549        /* Trap B-Tree writes */
2550        if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2551            (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2552            (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2553
2554                /* 
2555                 * Swap and validate the node if it is in native byte order.
2556                 * This is always be true on big endian, so we always validate
2557                 * before writing here.  On little endian, the node typically has
2558                 * been swapped and validatated when it was written to the journal,
2559                 * so we won't do anything here.
2560                 */
2561                if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2562                        /* Prepare the block pointer */
2563                        block.blockHeader = bp;
2564                        block.buffer = (char *)buf_dataptr(bp);
2565                        block.blockNum = buf_lblkno(bp);
2566                        /* not found in cache ==> came from disk */
2567                        block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2568                        block.blockSize = buf_count(bp);
2569    
2570                        /* Endian un-swap B-Tree node */
2571                        retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2572                        if (retval)
2573                                panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2574                }
2575        }
2576
2577        /* This buffer shouldn't be locked anymore but if it is clear it */
2578        if ((buf_flags(bp) & B_LOCKED)) {
2579                // XXXdbg
2580                if (VTOHFS(vp)->jnl) {
2581                        panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2582                }
2583                buf_clearflags(bp, B_LOCKED);
2584        }
2585        retval = vn_bwrite (ap);
2586
2587        return (retval);
2588}
2589
2590/*
2591 * Relocate a file to a new location on disk
2592 *  cnode must be locked on entry
2593 *
2594 * Relocation occurs by cloning the file's data from its
2595 * current set of blocks to a new set of blocks. During
2596 * the relocation all of the blocks (old and new) are
2597 * owned by the file.
2598 *
2599 * -----------------
2600 * |///////////////|
2601 * -----------------
2602 * 0               N (file offset)
2603 *
2604 * -----------------     -----------------
2605 * |///////////////|     |               |     STEP 1 (aquire new blocks)
2606 * -----------------     -----------------
2607 * 0               N     N+1             2N
2608 *
2609 * -----------------     -----------------
2610 * |///////////////|     |///////////////|     STEP 2 (clone data)
2611 * -----------------     -----------------
2612 * 0               N     N+1             2N
2613 *
2614 *                       -----------------
2615 *                       |///////////////|     STEP 3 (head truncate blocks)
2616 *                       -----------------
2617 *                       0               N
2618 *
2619 * During steps 2 and 3 page-outs to file offsets less
2620 * than or equal to N are suspended.
2621 *
2622 * During step 3 page-ins to the file get supended.
2623 */
2624__private_extern__
2625int
2626hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2627        struct  proc *p)
2628{
2629        struct  cnode *cp;
2630        struct  filefork *fp;
2631        struct  hfsmount *hfsmp;
2632        u_int32_t  headblks;
2633        u_int32_t  datablks;
2634        u_int32_t  blksize;
2635        u_int32_t  growsize;
2636        u_int32_t  nextallocsave;
2637        daddr64_t  sector_a,  sector_b;
2638        int disabled_caching = 0;
2639        int eflags;
2640        off_t  newbytes;
2641        int  retval;
2642        int lockflags = 0;
2643        int took_trunc_lock = 0;
2644        int started_tr = 0;
2645        enum vtype vnodetype;
2646
2647        vnodetype = vnode_vtype(vp);
2648        if (vnodetype != VREG && vnodetype != VLNK) {
2649                return (EPERM);
2650        }
2651        
2652        hfsmp = VTOHFS(vp);
2653        if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2654                return (ENOSPC);
2655        }
2656
2657        cp = VTOC(vp);
2658        fp = VTOF(vp);
2659        if (fp->ff_unallocblocks)
2660                return (EINVAL);
2661        blksize = hfsmp->blockSize;
2662        if (blockHint == 0)
2663                blockHint = hfsmp->nextAllocation;
2664
2665        if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2666            ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2667                return (EFBIG);
2668        }
2669
2670        //
2671        // We do not believe that this call to hfs_fsync() is
2672        // necessary and it causes a journal transaction
2673        // deadlock so we are removing it.
2674        //
2675        //if (vnodetype == VREG && !vnode_issystem(vp)) {
2676        //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2677        //      if (retval)
2678        //              return (retval);
2679        //}
2680
2681        if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2682                hfs_unlock(cp);
2683                hfs_lock_truncate(cp, TRUE);
2684                if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2685                        hfs_unlock_truncate(cp);
2686                        return (retval);
2687                }
2688                took_trunc_lock = 1;
2689        }
2690        headblks = fp->ff_blocks;
2691        datablks = howmany(fp->ff_size, blksize);
2692        growsize = datablks * blksize;
2693        eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2694        if (blockHint >= hfsmp->hfs_metazone_start &&
2695            blockHint <= hfsmp->hfs_metazone_end)
2696                eflags |= kEFMetadataMask;
2697
2698        if (hfs_start_transaction(hfsmp) != 0) {
2699                if (took_trunc_lock)
2700                        hfs_unlock_truncate(cp);
2701            return (EINVAL);
2702        }
2703        started_tr = 1;
2704        /*
2705         * Protect the extents b-tree and the allocation bitmap
2706         * during MapFileBlockC and ExtendFileC operations.
2707         */
2708        lockflags = SFL_BITMAP;
2709        if (overflow_extents(fp))
2710                lockflags |= SFL_EXTENTS;
2711        lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2712
2713        retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2714        if (retval) {
2715                retval = MacToVFSError(retval);
2716                goto out;
2717        }
2718
2719        /*
2720         * STEP 1 - aquire new allocation blocks.
2721         */
2722        if (!vnode_isnocache(vp)) {
2723                vnode_setnocache(vp);
2724                disabled_caching = 1;
2725
2726        }
2727        nextallocsave = hfsmp->nextAllocation;
2728        retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2729        if (eflags & kEFMetadataMask) {
2730                HFS_MOUNT_LOCK(hfsmp, TRUE);
2731                hfsmp->nextAllocation = nextallocsave;
2732                hfsmp->vcbFlags |= 0xFF00;
2733                HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2734        }
2735
2736        retval = MacToVFSError(retval);
2737        if (retval == 0) {
2738                cp->c_flag |= C_MODIFIED;
2739                if (newbytes < growsize) {
2740                        retval = ENOSPC;
2741                        goto restore;
2742                } else if (fp->ff_blocks < (headblks + datablks)) {
2743                        printf("hfs_relocate: allocation failed");
2744                        retval = ENOSPC;
2745                        goto restore;
2746                }
2747
2748                retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2749                if (retval) {
2750                        retval = MacToVFSError(retval);
2751                } else if ((sector_a + 1) == sector_b) {
2752                        retval = ENOSPC;
2753                        goto restore;
2754                } else if ((eflags & kEFMetadataMask) &&
2755                           ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2756                              hfsmp->hfs_metazone_end)) {
2757                        printf("hfs_relocate: didn't move into metadata zone\n");
2758                        retval = ENOSPC;
2759                        goto restore;
2760                }
2761        }
2762        /* Done with system locks and journal for now. */
2763        hfs_systemfile_unlock(hfsmp, lockflags);
2764        lockflags = 0;
2765        hfs_end_transaction(hfsmp);
2766        started_tr = 0;
2767
2768        if (retval) {
2769                /*
2770                 * Check to see if failure is due to excessive fragmentation.
2771                 */
2772                if ((retval == ENOSPC) &&
2773                    (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2774                        hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2775                }
2776                goto out;
2777        }
2778        /*
2779         * STEP 2 - clone file data into the new allocation blocks.
2780         */
2781
2782        if (vnodetype == VLNK)
2783                retval = hfs_clonelink(vp, blksize, cred, p);
2784        else if (vnode_issystem(vp))
2785                retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2786        else
2787                retval = hfs_clonefile(vp, headblks, datablks, blksize);
2788
2789        /* Start transaction for step 3 or for a restore. */
2790        if (hfs_start_transaction(hfsmp) != 0) {
2791                retval = EINVAL;
2792                goto out;
2793        }
2794        started_tr = 1;
2795        if (retval)
2796                goto restore;
2797
2798        /*
2799         * STEP 3 - switch to cloned data and remove old blocks.
2800         */
2801        lockflags = SFL_BITMAP;
2802        if (overflow_extents(fp))
2803                lockflags |= SFL_EXTENTS;
2804        lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2805
2806        retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2807
2808        hfs_systemfile_unlock(hfsmp, lockflags);
2809        lockflags = 0;
2810        if (retval)
2811                goto restore;
2812out:
2813        if (took_trunc_lock)
2814                hfs_unlock_truncate(cp);
2815
2816        if (lockflags) {
2817                hfs_systemfile_unlock(hfsmp, lockflags);
2818                lockflags = 0;
2819        }
2820
2821        // See comment up above about calls to hfs_fsync()
2822        //
2823        //if (retval == 0)
2824        //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2825
2826        if (hfsmp->jnl) {
2827                if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2828                        (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2829                else
2830                        (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2831        }
2832exit:
2833        if (disabled_caching) {
2834                vnode_clearnocache(vp);
2835        }
2836        if (started_tr)
2837                hfs_end_transaction(hfsmp);
2838
2839        return (retval);
2840
2841restore:
2842        if (fp->ff_blocks == headblks)
2843                goto exit;
2844        /*
2845         * Give back any newly allocated space.
2846         */
2847        if (lockflags == 0) {
2848                lockflags = SFL_BITMAP;
2849                if (overflow_extents(fp))
2850                        lockflags |= SFL_EXTENTS;
2851                lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2852        }
2853
2854        (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2855
2856        hfs_systemfile_unlock(hfsmp, lockflags);
2857        lockflags = 0;
2858
2859        if (took_trunc_lock)
2860                hfs_unlock_truncate(cp);
2861        goto exit;
2862}
2863
2864
2865/*
2866 * Clone a symlink.
2867 *
2868 */
2869static int
2870hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2871{
2872        struct buf *head_bp = NULL;
2873        struct buf *tail_bp = NULL;
2874        int error;
2875
2876
2877        error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2878        if (error)
2879                goto out;
2880
2881        tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2882        if (tail_bp == NULL) {
2883                error = EIO;
2884                goto out;
2885        }
2886        bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2887        error = (int)buf_bwrite(tail_bp);
2888out:
2889        if (head_bp) {
2890                buf_markinvalid(head_bp);
2891                buf_brelse(head_bp);
2892        }       
2893        (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2894
2895        return (error);
2896}
2897
2898/*
2899 * Clone a file's data within the file.
2900 *
2901 */
2902static int
2903hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2904{
2905        caddr_t  bufp;
2906        size_t  writebase;
2907        size_t  bufsize;
2908        size_t  copysize;
2909        size_t  iosize;
2910        off_t   filesize;
2911        size_t  offset;
2912        uio_t auio;
2913        int  error = 0;
2914
2915        filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2916        writebase = blkstart * blksize;
2917        copysize = blkcnt * blksize;
2918        iosize = bufsize = MIN(copysize, 4096 * 16);
2919        offset = 0;
2920
2921        if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2922                return (ENOMEM);
2923        }       
2924        hfs_unlock(VTOC(vp));
2925
2926        auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2927
2928        while (offset < copysize) {
2929                iosize = MIN(copysize - offset, iosize);
2930
2931                uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2932                uio_addiov(auio, (uintptr_t)bufp, iosize);
2933
2934                error = cluster_read(vp, auio, copysize, 0);
2935                if (error) {
2936                        printf("hfs_clonefile: cluster_read failed - %d\n", error);
2937                        break;
2938                }
2939                if (uio_resid(auio) != 0) {
2940                        printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2941                        error = EIO;            
2942                        break;
2943                }
2944
2945                uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2946                uio_addiov(auio, (uintptr_t)bufp, iosize);
2947
2948                error = cluster_write(vp, auio, filesize + offset,
2949                                      filesize + offset + iosize,
2950                                      uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2951                if (error) {
2952                        printf("hfs_clonefile: cluster_write failed - %d\n", error);
2953                        break;
2954                }
2955                if (uio_resid(auio) != 0) {
2956                        printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2957                        error = EIO;            
2958                        break;
2959                }       
2960                offset += iosize;
2961        }
2962        uio_free(auio);
2963
2964        /*
2965         * No need to call ubc_sync_range or hfs_invalbuf
2966         * since the file was copied using IO_NOCACHE.
2967         */
2968
2969        kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2970
2971        hfs_lock(VTOC(vp), HFS_FORCE_LOCK);     
2972        return (error);
2973}
2974
2975/*
2976 * Clone a system (metadata) file.
2977 *
2978 */
2979static int
2980hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2981                 kauth_cred_t cred, struct proc *p)
2982{
2983        caddr_t  bufp;
2984        char * offset;
2985        size_t  bufsize;
2986        size_t  iosize;
2987        struct buf *bp = NULL;
2988        daddr64_t  blkno;
2989        daddr64_t  blk;
2990        daddr64_t  start_blk;
2991        daddr64_t  last_blk;
2992        int  breadcnt;
2993        int  i;
2994        int  error = 0;
2995
2996
2997        iosize = GetLogicalBlockSize(vp);
2998        bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2999        breadcnt = bufsize / iosize;
3000
3001        if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3002                return (ENOMEM);
3003        }       
3004        start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3005        last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3006        blkno = 0;
3007
3008        while (blkno < last_blk) {
3009                /*
3010                 * Read up to a megabyte
3011                 */
3012                offset = bufp;
3013                for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3014                        error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3015                        if (error) {
3016                                printf("hfs_clonesysfile: meta_bread error %d\n", error);
3017                                goto out;
3018                        }
3019                        if (buf_count(bp) != iosize) {
3020                                printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3021                                goto out;
3022                        }
3023                        bcopy((char *)buf_dataptr(bp), offset, iosize);
3024
3025                        buf_markinvalid(bp);
3026                        buf_brelse(bp);
3027                        bp = NULL;
3028
3029                        offset += iosize;
3030                }
3031        
3032                /*
3033                 * Write up to a megabyte
3034                 */
3035                offset = bufp;
3036                for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3037                        bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3038                        if (bp == NULL) {
3039                                printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3040                                error = EIO;
3041                                goto out;
3042                        }
3043                        bcopy(offset, (char *)buf_dataptr(bp), iosize);
3044                        error = (int)buf_bwrite(bp);
3045                        bp = NULL;
3046                        if (error)
3047                                goto out;
3048                        offset += iosize;
3049                }
3050        }
3051out:
3052        if (bp) {
3053                buf_brelse(bp);
3054        }
3055
3056        kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3057
3058        error = hfs_fsync(vp, MNT_WAIT, 0, p);
3059
3060        return (error);
3061}
3062
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.