darwin-xnu/bsd/vfs/vfs_subr.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3 *
   4 * @APPLE_LICENSE_HEADER_START@
   5 * 
   6 * The contents of this file constitute Original Code as defined in and
   7 * are subject to the Apple Public Source License Version 1.1 (the
   8 * "License").  You may not use this file except in compliance with the
   9 * License.  Please obtain a copy of the License at
  10 * http://www.apple.com/publicsource and read it before using this file.
  11 * 
  12 * This Original Code and all software distributed under the License are
  13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17 * License for the specific language governing rights and limitations
  18 * under the License.
  19 * 
  20 * @APPLE_LICENSE_HEADER_END@
  21 */
  22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
  23/*
  24 * Copyright (c) 1989, 1993
  25 *      The Regents of the University of California.  All rights reserved.
  26 * (c) UNIX System Laboratories, Inc.
  27 * All or some portions of this file are derived from material licensed
  28 * to the University of California by American Telephone and Telegraph
  29 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  30 * the permission of UNIX System Laboratories, Inc.
  31 *
  32 * Redistribution and use in source and binary forms, with or without
  33 * modification, are permitted provided that the following conditions
  34 * are met:
  35 * 1. Redistributions of source code must retain the above copyright
  36 *    notice, this list of conditions and the following disclaimer.
  37 * 2. Redistributions in binary form must reproduce the above copyright
  38 *    notice, this list of conditions and the following disclaimer in the
  39 *    documentation and/or other materials provided with the distribution.
  40 * 3. All advertising materials mentioning features or use of this software
  41 *    must display the following acknowledgement:
  42 *      This product includes software developed by the University of
  43 *      California, Berkeley and its contributors.
  44 * 4. Neither the name of the University nor the names of its contributors
  45 *    may be used to endorse or promote products derived from this software
  46 *    without specific prior written permission.
  47 *
  48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58 * SUCH DAMAGE.
  59 *
  60 *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
  61 */
  62
  63/*
  64 * External virtual filesystem routines
  65 */
  66
  67#undef  DIAGNOSTIC
  68#define DIAGNOSTIC 1
  69
  70#include <sys/param.h>
  71#include <sys/systm.h>
  72#include <sys/proc_internal.h>
  73#include <sys/kauth.h>
  74#include <sys/mount_internal.h>
  75#include <sys/time.h>
  76#include <sys/lock.h>
  77#include <sys/vnode_internal.h>
  78#include <sys/stat.h>
  79#include <sys/namei.h>
  80#include <sys/ucred.h>
  81#include <sys/buf_internal.h>
  82#include <sys/errno.h>
  83#include <sys/malloc.h>
  84#include <sys/domain.h>
  85#include <sys/mbuf.h>
  86#include <sys/syslog.h>
  87#include <sys/ubc_internal.h>
  88#include <sys/vm.h>
  89#include <sys/sysctl.h>
  90#include <sys/filedesc.h>
  91#include <sys/event.h>
  92#include <sys/kdebug.h>
  93#include <sys/kauth.h>
  94#include <sys/user.h>
  95#include <miscfs/fifofs/fifo.h>
  96
  97#include <string.h>
  98#include <machine/spl.h>
  99
 100
 101#include <kern/assert.h>
 102
 103#include <miscfs/specfs/specdev.h>
 104
 105#include <mach/mach_types.h>
 106#include <mach/memory_object_types.h>
 107
 108extern lck_grp_t *vnode_lck_grp;
 109extern lck_attr_t *vnode_lck_attr;
 110
 111
 112extern lck_mtx_t * mnt_list_mtx_lock;
 113
 114enum vtype iftovt_tab[16] = {
 115        VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 116        VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 117};
 118int     vttoif_tab[9] = {
 119        0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 120        S_IFSOCK, S_IFIFO, S_IFMT,
 121};
 122
 123extern int ubc_isinuse_locked(vnode_t, int, int);
 124extern kern_return_t adjust_vm_object_cache(vm_size_t oval, vm_size_t nval);
 125
 126static void vnode_list_add(vnode_t);
 127static void vnode_list_remove(vnode_t);
 128
 129static errno_t vnode_drain(vnode_t);
 130static void vgone(vnode_t);
 131static void vclean(vnode_t vp, int flag, proc_t p);
 132static void vnode_reclaim_internal(vnode_t, int, int);
 133
 134static void vnode_dropiocount (vnode_t, int);
 135static errno_t vnode_getiocount(vnode_t vp, int locked, int vid, int vflags);
 136static int vget_internal(vnode_t, int, int);
 137
 138static vnode_t checkalias(vnode_t vp, dev_t nvp_rdev);
 139static int  vnode_reload(vnode_t);
 140static int  vnode_isinuse_locked(vnode_t, int, int);
 141
 142static void insmntque(vnode_t vp, mount_t mp);
 143mount_t mount_list_lookupby_fsid(fsid_t *, int, int);
 144static int mount_getvfscnt(void);
 145static int mount_fillfsids(fsid_t *, int );
 146static void vnode_iterate_setup(mount_t);
 147static int vnode_umount_preflight(mount_t, vnode_t, int);
 148static int vnode_iterate_prepare(mount_t);
 149static int vnode_iterate_reloadq(mount_t);
 150static void vnode_iterate_clear(mount_t);
 151
 152TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
 153TAILQ_HEAD(inactivelst, vnode) vnode_inactive_list;     /* vnode inactive list */
 154struct mntlist mountlist;                       /* mounted filesystem list */
 155static int nummounts = 0;
 156
 157#if DIAGNOSTIC
 158#define VLISTCHECK(fun, vp, list)       \
 159        if ((vp)->v_freelist.tqe_prev == (struct vnode **)0xdeadb) \
 160                panic("%s: %s vnode not on %slist", (fun), (list), (list));
 161
 162#define VINACTIVECHECK(fun, vp, expected)       \
 163        do {    \
 164                int __is_inactive = ISSET((vp)->v_flag, VUINACTIVE);    \
 165                if (__is_inactive ^ expected)   \
 166                        panic("%s: %sinactive vnode, expected %s", (fun),       \
 167                                __is_inactive? "" : "not ",     \
 168                                expected? "inactive": "not inactive"); \
 169        } while(0)
 170#else
 171#define VLISTCHECK(fun, vp, list)
 172#define VINACTIVECHECK(fun, vp, expected)
 173#endif /* DIAGNOSTIC */
 174
 175#define VLISTNONE(vp)   \
 176        do {    \
 177                (vp)->v_freelist.tqe_next = (struct vnode *)0;  \
 178                (vp)->v_freelist.tqe_prev = (struct vnode **)0xdeadb;   \
 179        } while(0)
 180
 181#define VONLIST(vp)     \
 182        ((vp)->v_freelist.tqe_prev != (struct vnode **)0xdeadb)
 183
 184/* remove a vnode from free vnode list */
 185#define VREMFREE(fun, vp)       \
 186        do {    \
 187                VLISTCHECK((fun), (vp), "free");        \
 188                TAILQ_REMOVE(&vnode_free_list, (vp), v_freelist);       \
 189                VLISTNONE((vp));        \
 190                freevnodes--;   \
 191        } while(0)
 192
 193/* remove a vnode from inactive vnode list */
 194#define VREMINACTIVE(fun, vp)   \
 195        do {    \
 196                VLISTCHECK((fun), (vp), "inactive"); \
 197                VINACTIVECHECK((fun), (vp), VUINACTIVE); \
 198                TAILQ_REMOVE(&vnode_inactive_list, (vp), v_freelist); \
 199                CLR((vp)->v_flag, VUINACTIVE); \
 200                VLISTNONE((vp));        \
 201                inactivevnodes--;       \
 202        } while(0)
 203
 204/*
 205 * Have to declare first two locks as actual data even if !MACH_SLOCKS, since
 206 * a pointers to them get passed around.
 207 */
 208void * mntvnode_slock;
 209void * mntid_slock;
 210void * spechash_slock;
 211
 212/*
 213 * vnodetarget is the amount of vnodes we expect to get back 
 214 * from the the inactive vnode list and VM object cache.
 215 * As vnreclaim() is a mainly cpu bound operation for faster 
 216 * processers this number could be higher.
 217 * Having this number too high introduces longer delays in 
 218 * the execution of new_vnode().
 219 */
 220unsigned long vnodetarget;              /* target for vnreclaim() */
 221#define VNODE_FREE_TARGET       20      /* Default value for vnodetarget */
 222
 223/*
 224 * We need quite a few vnodes on the free list to sustain the
 225 * rapid stat() the compilation process does, and still benefit from the name
 226 * cache. Having too few vnodes on the free list causes serious disk
 227 * thrashing as we cycle through them.
 228 */
 229#define VNODE_FREE_MIN          300     /* freelist should have at least these many */
 230
 231/*
 232 * We need to get vnodes back from the VM object cache when a certain #
 233 * of vnodes are reused from the freelist. This is essential for the
 234 * caching to be effective in the namecache and the buffer cache [for the
 235 * metadata].
 236 */
 237#define VNODE_TOOMANY_REUSED    (VNODE_FREE_MIN/4)
 238
 239/*
 240 * If we have enough vnodes on the freelist we do not want to reclaim
 241 * the vnodes from the VM object cache.
 242 */
 243#define VNODE_FREE_ENOUGH       (VNODE_FREE_MIN + (VNODE_FREE_MIN/2))
 244
 245/*
 246 * Initialize the vnode management data structures.
 247 */
 248__private_extern__ void
 249vntblinit(void)
 250{
 251        TAILQ_INIT(&vnode_free_list);
 252        TAILQ_INIT(&vnode_inactive_list);
 253        TAILQ_INIT(&mountlist);
 254
 255        if (!vnodetarget)
 256                vnodetarget = VNODE_FREE_TARGET;
 257
 258        /*
 259         * Scale the vm_object_cache to accomodate the vnodes 
 260         * we want to cache
 261         */
 262        (void) adjust_vm_object_cache(0, desiredvnodes - VNODE_FREE_MIN);
 263}
 264
 265/* Reset the VM Object Cache with the values passed in */
 266__private_extern__ kern_return_t
 267reset_vmobjectcache(unsigned int val1, unsigned int val2)
 268{
 269        vm_size_t oval = val1 - VNODE_FREE_MIN;
 270        vm_size_t nval;
 271        
 272        if(val2 < VNODE_FREE_MIN)
 273                nval = 0;
 274        else
 275                nval = val2 - VNODE_FREE_MIN;
 276
 277        return(adjust_vm_object_cache(oval, nval));
 278}
 279
 280
 281/* the timeout is in 10 msecs */
 282int
 283vnode_waitforwrites(vnode_t vp, int output_target, int slpflag, int slptimeout, char *msg) {
 284        int error = 0;
 285        struct timespec ts;
 286
 287        KERNEL_DEBUG(0x3010280 | DBG_FUNC_START, (int)vp, output_target, vp->v_numoutput, 0, 0);
 288
 289        if (vp->v_numoutput > output_target) {
 290
 291                slpflag &= ~PDROP;
 292
 293                vnode_lock(vp);
 294
 295                while ((vp->v_numoutput > output_target) && error == 0) {
 296                        if (output_target)
 297                                vp->v_flag |= VTHROTTLED;
 298                        else
 299                                vp->v_flag |= VBWAIT;
 300                        ts.tv_sec = (slptimeout/100);
 301                        ts.tv_nsec = (slptimeout % 1000)  * 10 * NSEC_PER_USEC * 1000 ;
 302                        error = msleep((caddr_t)&vp->v_numoutput, &vp->v_lock, (slpflag | (PRIBIO + 1)), msg, &ts);
 303                }
 304                vnode_unlock(vp);
 305        }
 306        KERNEL_DEBUG(0x3010280 | DBG_FUNC_END, (int)vp, output_target, vp->v_numoutput, error, 0);
 307
 308        return error;
 309}
 310
 311
 312void
 313vnode_startwrite(vnode_t vp) {
 314
 315        OSAddAtomic(1, &vp->v_numoutput);
 316}
 317
 318
 319void
 320vnode_writedone(vnode_t vp)
 321{
 322        if (vp) {
 323                int need_wakeup = 0;
 324          
 325                OSAddAtomic(-1, &vp->v_numoutput);
 326
 327                vnode_lock(vp);
 328
 329                if (vp->v_numoutput < 0)
 330                        panic("vnode_writedone: numoutput < 0");
 331
 332                if ((vp->v_flag & VTHROTTLED) && (vp->v_numoutput < (VNODE_ASYNC_THROTTLE / 3))) {
 333                        vp->v_flag &= ~VTHROTTLED;
 334                        need_wakeup = 1;
 335                }
 336                if ((vp->v_flag & VBWAIT) && (vp->v_numoutput == 0)) {
 337                        vp->v_flag &= ~VBWAIT;
 338                        need_wakeup = 1;
 339                }
 340                vnode_unlock(vp);
 341                
 342                if (need_wakeup)
 343                        wakeup((caddr_t)&vp->v_numoutput);
 344        }
 345}
 346
 347
 348
 349int
 350vnode_hasdirtyblks(vnode_t vp)
 351{
 352        struct cl_writebehind *wbp;
 353
 354        /*
 355         * Not taking the buf_mtxp as there is little
 356         * point doing it. Even if the lock is taken the
 357         * state can change right after that. If their 
 358         * needs to be a synchronization, it must be driven
 359         * by the caller
 360         */ 
 361        if (vp->v_dirtyblkhd.lh_first)
 362                return (1);
 363        
 364        if (!UBCINFOEXISTS(vp))
 365                return (0);
 366
 367        wbp = vp->v_ubcinfo->cl_wbehind;
 368
 369        if (wbp && (wbp->cl_number || wbp->cl_scmap))
 370                return (1);
 371
 372        return (0);
 373}
 374
 375int
 376vnode_hascleanblks(vnode_t vp)
 377{
 378        /*
 379         * Not taking the buf_mtxp as there is little
 380         * point doing it. Even if the lock is taken the
 381         * state can change right after that. If their 
 382         * needs to be a synchronization, it must be driven
 383         * by the caller
 384         */ 
 385        if (vp->v_cleanblkhd.lh_first)
 386                return (1);
 387        return (0);
 388}
 389
 390void
 391vnode_iterate_setup(mount_t mp)
 392{
 393        while (mp->mnt_lflag & MNT_LITER) {
 394                mp->mnt_lflag |= MNT_LITERWAIT;
 395                msleep((caddr_t)mp, &mp->mnt_mlock, PVFS, "vnode_iterate_setup", 0);    
 396        }
 397
 398        mp->mnt_lflag |= MNT_LITER;
 399
 400}
 401
 402static int
 403vnode_umount_preflight(mount_t mp, vnode_t skipvp, int flags)
 404{
 405        vnode_t vp;
 406
 407        TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 408                if (vp->v_type == VDIR)
 409                        continue;
 410                if (vp == skipvp)
 411                        continue;
 412                if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) ||
 413            (vp->v_flag & VNOFLUSH)))
 414                        continue;
 415                if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP))
 416                        continue;
 417                if ((flags & WRITECLOSE) &&
 418            (vp->v_writecount == 0 || vp->v_type != VREG)) 
 419                        continue;
 420                /* Look for busy vnode */
 421        if (((vp->v_usecount != 0) &&
 422            ((vp->v_usecount - vp->v_kusecount) != 0))) 
 423                        return(1);
 424                }
 425        
 426        return(0);
 427}
 428
 429/* 
 430 * This routine prepares iteration by moving all the vnodes to worker queue
 431 * called with mount lock held
 432 */
 433int
 434vnode_iterate_prepare(mount_t mp)
 435{
 436        vnode_t vp;
 437
 438        if (TAILQ_EMPTY(&mp->mnt_vnodelist)) {
 439                /* nothing to do */
 440                return (0);
 441        } 
 442
 443        vp = TAILQ_FIRST(&mp->mnt_vnodelist);
 444        vp->v_mntvnodes.tqe_prev = &(mp->mnt_workerqueue.tqh_first);
 445        mp->mnt_workerqueue.tqh_first = mp->mnt_vnodelist.tqh_first;
 446        mp->mnt_workerqueue.tqh_last = mp->mnt_vnodelist.tqh_last;
 447
 448        TAILQ_INIT(&mp->mnt_vnodelist);
 449        if (mp->mnt_newvnodes.tqh_first != NULL)
 450                panic("vnode_iterate_prepare: newvnode when entering vnode");
 451        TAILQ_INIT(&mp->mnt_newvnodes);
 452
 453        return (1);
 454}
 455
 456
 457/* called with mount lock held */
 458int 
 459vnode_iterate_reloadq(mount_t mp)
 460{
 461        int moved = 0;
 462
 463        /* add the remaining entries in workerq to the end of mount vnode list */
 464        if (!TAILQ_EMPTY(&mp->mnt_workerqueue)) {
 465                struct vnode * mvp;
 466                mvp = TAILQ_LAST(&mp->mnt_vnodelist, vnodelst);
 467                
 468                /* Joining the workerque entities to mount vnode list */
 469                if (mvp)
 470                        mvp->v_mntvnodes.tqe_next = mp->mnt_workerqueue.tqh_first;
 471                else
 472                        mp->mnt_vnodelist.tqh_first = mp->mnt_workerqueue.tqh_first;
 473                mp->mnt_workerqueue.tqh_first->v_mntvnodes.tqe_prev = mp->mnt_vnodelist.tqh_last;
 474                mp->mnt_vnodelist.tqh_last = mp->mnt_workerqueue.tqh_last;
 475                TAILQ_INIT(&mp->mnt_workerqueue);
 476        }
 477
 478        /* add the newvnodes to the head of mount vnode list */
 479        if (!TAILQ_EMPTY(&mp->mnt_newvnodes)) {
 480                struct vnode * nlvp;
 481                nlvp = TAILQ_LAST(&mp->mnt_newvnodes, vnodelst);
 482                
 483                mp->mnt_newvnodes.tqh_first->v_mntvnodes.tqe_prev = &mp->mnt_vnodelist.tqh_first;
 484                nlvp->v_mntvnodes.tqe_next = mp->mnt_vnodelist.tqh_first;
 485                if(mp->mnt_vnodelist.tqh_first) 
 486                        mp->mnt_vnodelist.tqh_first->v_mntvnodes.tqe_prev = &nlvp->v_mntvnodes.tqe_next;
 487                else
 488                        mp->mnt_vnodelist.tqh_last = mp->mnt_newvnodes.tqh_last;
 489                mp->mnt_vnodelist.tqh_first = mp->mnt_newvnodes.tqh_first;
 490                TAILQ_INIT(&mp->mnt_newvnodes);
 491                moved = 1;
 492        }
 493
 494        return(moved);
 495}
 496
 497
 498void
 499vnode_iterate_clear(mount_t mp)
 500{
 501        mp->mnt_lflag &= ~MNT_LITER;
 502        if (mp->mnt_lflag & MNT_LITERWAIT) {
 503                mp->mnt_lflag &= ~MNT_LITERWAIT;
 504                wakeup(mp);
 505        }
 506}
 507
 508
 509int
 510vnode_iterate(mp, flags, callout, arg)
 511        mount_t mp;
 512        int flags;
 513        int (*callout)(struct vnode *, void *);
 514        void * arg;
 515{
 516        struct vnode *vp;
 517        int vid, retval;
 518        int ret = 0;
 519
 520        mount_lock(mp);
 521
 522        vnode_iterate_setup(mp);
 523
 524        /* it is returns 0 then there is nothing to do */
 525        retval = vnode_iterate_prepare(mp);
 526
 527        if (retval == 0)  {
 528                vnode_iterate_clear(mp);
 529                mount_unlock(mp);
 530                return(ret);
 531        }
 532        
 533        /* iterate over all the vnodes */
 534        while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) {
 535                vp = TAILQ_FIRST(&mp->mnt_workerqueue);
 536                TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes);
 537                TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
 538                vid = vp->v_id;
 539                if ((vp->v_data == NULL) || (vp->v_type == VNON) || (vp->v_mount != mp)) {
 540                        continue;
 541                }
 542                mount_unlock(mp);
 543
 544                if ( vget_internal(vp, vid, (flags | VNODE_NODEAD| VNODE_WITHID | VNODE_NOSUSPEND))) {
 545                        mount_lock(mp);
 546                        continue;       
 547                }
 548                if (flags & VNODE_RELOAD) {
 549                        /*
 550                         * we're reloading the filesystem
 551                         * cast out any inactive vnodes...
 552                         */
 553                        if (vnode_reload(vp)) {
 554                                /* vnode will be recycled on the refcount drop */
 555                                vnode_put(vp);
 556                                mount_lock(mp);
 557                                continue;
 558                        }
 559                }
 560
 561                retval = callout(vp, arg);
 562
 563                switch (retval) {
 564                  case VNODE_RETURNED:
 565                  case VNODE_RETURNED_DONE:
 566                          vnode_put(vp);
 567                          if (retval == VNODE_RETURNED_DONE) {
 568                                mount_lock(mp);
 569                                ret = 0;
 570                                goto out;
 571                          }
 572                          break;
 573
 574                  case VNODE_CLAIMED_DONE:
 575                                mount_lock(mp);
 576                                ret = 0;
 577                                goto out;
 578                  case VNODE_CLAIMED:
 579                  default:
 580                                break;
 581                }
 582                mount_lock(mp);
 583        }
 584
 585out:
 586        (void)vnode_iterate_reloadq(mp);
 587        vnode_iterate_clear(mp);
 588        mount_unlock(mp);
 589        return (ret);
 590}
 591
 592void
 593mount_lock_renames(mount_t mp)
 594{
 595        lck_mtx_lock(&mp->mnt_renamelock);
 596}
 597
 598void
 599mount_unlock_renames(mount_t mp)
 600{
 601        lck_mtx_unlock(&mp->mnt_renamelock);
 602}
 603
 604void
 605mount_lock(mount_t mp)
 606{
 607        lck_mtx_lock(&mp->mnt_mlock);
 608}
 609
 610void
 611mount_unlock(mount_t mp)
 612{
 613        lck_mtx_unlock(&mp->mnt_mlock);
 614}
 615
 616
 617void
 618mount_ref(mount_t mp, int locked)
 619{
 620        if ( !locked)
 621                mount_lock(mp);
 622        
 623        mp->mnt_count++;
 624
 625        if ( !locked)
 626                mount_unlock(mp);
 627}
 628
 629
 630void
 631mount_drop(mount_t mp, int locked)
 632{
 633        if ( !locked)
 634                mount_lock(mp);
 635        
 636        mp->mnt_count--;
 637
 638        if (mp->mnt_count == 0 && (mp->mnt_lflag & MNT_LDRAIN))
 639                wakeup(&mp->mnt_lflag);
 640
 641        if ( !locked)
 642                mount_unlock(mp);
 643}
 644
 645
 646int
 647mount_iterref(mount_t mp, int locked)
 648{
 649        int retval = 0;
 650
 651        if (!locked)
 652                mount_list_lock();
 653        if (mp->mnt_iterref < 0) {
 654                retval = 1;
 655        } else {
 656                mp->mnt_iterref++;
 657        }
 658        if (!locked)
 659                mount_list_unlock();
 660        return(retval);
 661}
 662
 663int
 664mount_isdrained(mount_t mp, int locked)
 665{
 666        int retval;
 667
 668        if (!locked)
 669                mount_list_lock();
 670        if (mp->mnt_iterref < 0)
 671                retval = 1;
 672        else
 673                retval = 0;     
 674        if (!locked)
 675                mount_list_unlock();
 676        return(retval);
 677}
 678
 679void
 680mount_iterdrop(mount_t mp)
 681{
 682        mount_list_lock();
 683        mp->mnt_iterref--;
 684        wakeup(&mp->mnt_iterref);
 685        mount_list_unlock();
 686}
 687
 688void
 689mount_iterdrain(mount_t mp)
 690{
 691        mount_list_lock();
 692        while (mp->mnt_iterref)
 693                msleep((caddr_t)&mp->mnt_iterref, mnt_list_mtx_lock, PVFS, "mount_iterdrain", 0 );
 694        /* mount iterations drained */
 695        mp->mnt_iterref = -1;
 696        mount_list_unlock();
 697}
 698void
 699mount_iterreset(mount_t mp)
 700{
 701        mount_list_lock();
 702        if (mp->mnt_iterref == -1)
 703                mp->mnt_iterref = 0;
 704        mount_list_unlock();
 705}
 706
 707/* always called with  mount lock held */
 708int 
 709mount_refdrain(mount_t mp)
 710{
 711        if (mp->mnt_lflag & MNT_LDRAIN)
 712                panic("already in drain");
 713        mp->mnt_lflag |= MNT_LDRAIN;
 714
 715        while (mp->mnt_count)
 716                msleep((caddr_t)&mp->mnt_lflag, &mp->mnt_mlock, PVFS, "mount_drain", 0 );
 717
 718        if (mp->mnt_vnodelist.tqh_first != NULL)
 719                 panic("mount_refdrain: dangling vnode"); 
 720
 721        mp->mnt_lflag &= ~MNT_LDRAIN;
 722
 723        return(0);
 724}
 725
 726
 727/*
 728 * Mark a mount point as busy. Used to synchronize access and to delay
 729 * unmounting.
 730 */
 731int
 732vfs_busy(mount_t mp, int flags)
 733{
 734
 735restart:
 736        if (mp->mnt_lflag & MNT_LDEAD)
 737                return(ENOENT);
 738
 739        if (mp->mnt_lflag & MNT_LUNMOUNT) {
 740                if (flags & LK_NOWAIT)
 741                        return (ENOENT);
 742
 743                mount_lock(mp);
 744
 745                if (mp->mnt_lflag & MNT_LDEAD) {
 746                        mount_unlock(mp);
 747                        return(ENOENT);
 748                }
 749                if (mp->mnt_lflag & MNT_LUNMOUNT) {
 750                        mp->mnt_lflag |= MNT_LWAIT;
 751                        /*
 752                         * Since all busy locks are shared except the exclusive
 753                         * lock granted when unmounting, the only place that a
 754                         * wakeup needs to be done is at the release of the
 755                         * exclusive lock at the end of dounmount.
 756                         */
 757                        msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "vfsbusy", 0 );
 758                        return (ENOENT);
 759                }
 760                mount_unlock(mp);
 761        }
 762
 763        lck_rw_lock_shared(&mp->mnt_rwlock);
 764
 765        /* 
 766         * until we are granted the rwlock, it's possible for the mount point to
 767         * change state, so reevaluate before granting the vfs_busy
 768         */
 769        if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) {
 770                lck_rw_done(&mp->mnt_rwlock);
 771                goto restart;
 772        }
 773        return (0);
 774}
 775
 776/*
 777 * Free a busy filesystem.
 778 */
 779
 780void
 781vfs_unbusy(mount_t mp)
 782{
 783        lck_rw_done(&mp->mnt_rwlock);
 784}
 785
 786
 787
 788static void
 789vfs_rootmountfailed(mount_t mp) {
 790
 791        mount_list_lock();
 792        mp->mnt_vtable->vfc_refcount--;
 793        mount_list_unlock();
 794
 795        vfs_unbusy(mp);
 796
 797        mount_lock_destroy(mp);
 798
 799        FREE_ZONE(mp, sizeof(struct mount), M_MOUNT);
 800}
 801
 802/*
 803 * Lookup a filesystem type, and if found allocate and initialize
 804 * a mount structure for it.
 805 *
 806 * Devname is usually updated by mount(8) after booting.
 807 */
 808static mount_t
 809vfs_rootmountalloc_internal(struct vfstable *vfsp, const char *devname)
 810{
 811        mount_t mp;
 812
 813        mp = _MALLOC_ZONE((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 814        bzero((char *)mp, (u_long)sizeof(struct mount));
 815
 816        /* Initialize the default IO constraints */
 817        mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
 818        mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
 819        mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
 820        mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
 821        mp->mnt_devblocksize = DEV_BSIZE;
 822
 823        mount_lock_init(mp);
 824        (void)vfs_busy(mp, LK_NOWAIT);
 825
 826        TAILQ_INIT(&mp->mnt_vnodelist);
 827        TAILQ_INIT(&mp->mnt_workerqueue);
 828        TAILQ_INIT(&mp->mnt_newvnodes);
 829
 830        mp->mnt_vtable = vfsp;
 831        mp->mnt_op = vfsp->vfc_vfsops;
 832        mp->mnt_flag = MNT_RDONLY | MNT_ROOTFS;
 833        mp->mnt_vnodecovered = NULLVP;
 834        //mp->mnt_stat.f_type = vfsp->vfc_typenum;
 835        mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 836
 837        mount_list_lock();
 838        vfsp->vfc_refcount++;
 839        mount_list_unlock();
 840
 841        strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
 842        mp->mnt_vfsstat.f_mntonname[0] = '/';
 843        (void) copystr((char *)devname, mp->mnt_vfsstat.f_mntfromname, MAXPATHLEN - 1, 0);
 844
 845        return (mp);
 846}
 847
 848errno_t
 849vfs_rootmountalloc(const char *fstypename, const char *devname, mount_t *mpp)
 850{
 851        struct vfstable *vfsp;
 852
 853        for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 854                if (!strcmp(vfsp->vfc_name, fstypename))
 855                        break;
 856        if (vfsp == NULL)
 857                return (ENODEV);
 858
 859        *mpp = vfs_rootmountalloc_internal(vfsp, devname);
 860
 861        if (*mpp)
 862                return (0);
 863
 864        return (ENOMEM);
 865}
 866
 867
 868/*
 869 * Find an appropriate filesystem to use for the root. If a filesystem
 870 * has not been preselected, walk through the list of known filesystems
 871 * trying those that have mountroot routines, and try them until one
 872 * works or we have tried them all.
 873 */
 874extern int (*mountroot)(void);
 875
 876int
 877vfs_mountroot()
 878{
 879        struct vfstable *vfsp;
 880        struct vfs_context context;
 881        int     error;
 882        mount_t mp;
 883
 884        if (mountroot != NULL) {
 885                /*
 886                 * used for netboot which follows a different set of rules
 887                 */
 888                error = (*mountroot)();
 889                return (error);
 890        }
 891        if ((error = bdevvp(rootdev, &rootvp))) {
 892                printf("vfs_mountroot: can't setup bdevvp\n");
 893                return (error);
 894        }
 895        context.vc_proc = current_proc();
 896        context.vc_ucred = kauth_cred_get();
 897
 898        for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 899                if (vfsp->vfc_mountroot == NULL)
 900                        continue;
 901
 902                mp = vfs_rootmountalloc_internal(vfsp, "root_device");
 903                mp->mnt_devvp = rootvp;
 904
 905                if ((error = (*vfsp->vfc_mountroot)(mp, rootvp, &context)) == 0) {
 906                        mp->mnt_devvp->v_specflags |= SI_MOUNTEDON;
 907
 908                        vfs_unbusy(mp);
 909
 910                        mount_list_add(mp);
 911
 912                        /*
 913                         *   cache the IO attributes for the underlying physical media...
 914                         *   an error return indicates the underlying driver doesn't
 915                         *   support all the queries necessary... however, reasonable
 916                         *   defaults will have been set, so no reason to bail or care
 917                         */
 918                        vfs_init_io_attributes(rootvp, mp);
 919                        /*
 920                         * get rid of iocount reference returned
 921                         * by bdevvp... it will have also taken
 922                         * a usecount reference which we want to keep
 923                         */
 924                        vnode_put(rootvp);
 925
 926                        return (0);
 927                }
 928                vfs_rootmountfailed(mp);
 929                
 930                if (error != EINVAL)
 931                        printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 932        }
 933        return (ENODEV);
 934}
 935
 936/*
 937 * Lookup a mount point by filesystem identifier.
 938 */
 939extern mount_t vfs_getvfs_locked(fsid_t *);
 940
 941struct mount *
 942vfs_getvfs(fsid)
 943        fsid_t *fsid;
 944{
 945        return (mount_list_lookupby_fsid(fsid, 0, 0));
 946}
 947
 948struct mount *
 949vfs_getvfs_locked(fsid)
 950        fsid_t *fsid;
 951{
 952        return(mount_list_lookupby_fsid(fsid, 1, 0));
 953}
 954
 955struct mount *
 956vfs_getvfs_by_mntonname(u_char *path)
 957{
 958        mount_t retmp = (mount_t)0;
 959        mount_t mp;
 960
 961        mount_list_lock();
 962        TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 963                if (!strcmp(mp->mnt_vfsstat.f_mntonname, path)) {
 964                        retmp = mp;
 965                        goto out;
 966                }
 967        }
 968out:
 969        mount_list_unlock();
 970        return (retmp);
 971}
 972
 973/* generation number for creation of new fsids */
 974u_short mntid_gen = 0;
 975/*
 976 * Get a new unique fsid
 977 */
 978void
 979vfs_getnewfsid(mp)
 980        struct mount *mp;
 981{
 982
 983        fsid_t tfsid;
 984        int mtype;
 985        mount_t nmp;
 986
 987        mount_list_lock();
 988
 989        /* generate a new fsid */
 990        mtype = mp->mnt_vtable->vfc_typenum;
 991        if (++mntid_gen == 0)
 992                mntid_gen++;
 993        tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen);
 994        tfsid.val[1] = mtype;
 995
 996        TAILQ_FOREACH(nmp, &mountlist, mnt_list) {
 997                while (vfs_getvfs_locked(&tfsid)) {
 998                        if (++mntid_gen == 0)
 999                                mntid_gen++;
1000                        tfsid.val[0] = makedev(nblkdev + mtype, mntid_gen);
1001                }
1002        }
1003        mp->mnt_vfsstat.f_fsid.val[0] = tfsid.val[0];
1004        mp->mnt_vfsstat.f_fsid.val[1] = tfsid.val[1];
1005        mount_list_unlock();
1006}
1007
1008/*
1009 * Routines having to do with the management of the vnode table.
1010 */
1011extern int (**dead_vnodeop_p)(void *);
1012long numvnodes, freevnodes;
1013long inactivevnodes;
1014
1015
1016/*
1017 * Move a vnode from one mount queue to another.
1018 */
1019static void
1020insmntque(vnode_t vp, mount_t mp)
1021{
1022        mount_t lmp;
1023        /*
1024         * Delete from old mount point vnode list, if on one.
1025         */
1026        if ( (lmp = vp->v_mount) != NULL && lmp != dead_mountp) {
1027                if ((vp->v_lflag & VNAMED_MOUNT) == 0)
1028                        panic("insmntque: vp not in mount vnode list");
1029                vp->v_lflag &= ~VNAMED_MOUNT;
1030
1031                mount_lock(lmp);
1032
1033                mount_drop(lmp, 1);
1034
1035                if (vp->v_mntvnodes.tqe_next == NULL) {
1036                        if (TAILQ_LAST(&lmp->mnt_vnodelist, vnodelst) == vp)
1037                                TAILQ_REMOVE(&lmp->mnt_vnodelist, vp, v_mntvnodes);
1038                        else if (TAILQ_LAST(&lmp->mnt_newvnodes, vnodelst) == vp)
1039                                TAILQ_REMOVE(&lmp->mnt_newvnodes, vp, v_mntvnodes);
1040                        else if (TAILQ_LAST(&lmp->mnt_workerqueue, vnodelst) == vp)
1041                                TAILQ_REMOVE(&lmp->mnt_workerqueue, vp, v_mntvnodes);
1042                 } else {
1043                        vp->v_mntvnodes.tqe_next->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_prev;
1044                        *vp->v_mntvnodes.tqe_prev = vp->v_mntvnodes.tqe_next;
1045                }       
1046                vp->v_mntvnodes.tqe_next = 0;
1047                vp->v_mntvnodes.tqe_prev = 0;
1048                mount_unlock(lmp);
1049                return;
1050        }
1051
1052        /*
1053         * Insert into list of vnodes for the new mount point, if available.
1054         */
1055        if ((vp->v_mount = mp) != NULL) {
1056                mount_lock(mp);
1057                if ((vp->v_mntvnodes.tqe_next != 0) && (vp->v_mntvnodes.tqe_prev != 0))
1058                        panic("vp already in mount list");
1059                if (mp->mnt_lflag & MNT_LITER)
1060                        TAILQ_INSERT_HEAD(&mp->mnt_newvnodes, vp, v_mntvnodes);
1061                else
1062                        TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
1063                if (vp->v_lflag & VNAMED_MOUNT)
1064                        panic("insmntque: vp already in mount vnode list");
1065                if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb))
1066                        panic("insmntque: vp on the free list\n");
1067                vp->v_lflag |= VNAMED_MOUNT;
1068                mount_ref(mp, 1);
1069                mount_unlock(mp);
1070        }
1071}
1072
1073
1074/*
1075 * Create a vnode for a block device.
1076 * Used for root filesystem, argdev, and swap areas.
1077 * Also used for memory file system special devices.
1078 */
1079int
1080bdevvp(dev_t dev, vnode_t *vpp)
1081{
1082        vnode_t nvp;
1083        int     error;
1084        struct vnode_fsparam vfsp;
1085        struct vfs_context context;
1086
1087        if (dev == NODEV) {
1088                *vpp = NULLVP;
1089                return (ENODEV);
1090        }
1091
1092        context.vc_proc = current_proc();
1093        context.vc_ucred = FSCRED;
1094
1095        vfsp.vnfs_mp = (struct mount *)0;
1096        vfsp.vnfs_vtype = VBLK;
1097        vfsp.vnfs_str = "bdevvp";
1098        vfsp.vnfs_dvp = 0;
1099        vfsp.vnfs_fsnode = 0;
1100        vfsp.vnfs_cnp = 0;
1101        vfsp.vnfs_vops = spec_vnodeop_p;
1102        vfsp.vnfs_rdev = dev;
1103        vfsp.vnfs_filesize = 0;
1104
1105        vfsp.vnfs_flags = VNFS_NOCACHE | VNFS_CANTCACHE;
1106
1107        vfsp.vnfs_marksystem = 0;
1108        vfsp.vnfs_markroot = 0;
1109
1110        if ( (error = vnode_create(VNCREATE_FLAVOR, VCREATESIZE, &vfsp, &nvp)) ) {
1111                *vpp = NULLVP;
1112                return (error);
1113        }
1114        if ( (error = vnode_ref(nvp)) ) {
1115                panic("bdevvp failed: vnode_ref");
1116                return (error);
1117        }
1118        if ( (error = VNOP_FSYNC(nvp, MNT_WAIT, &context)) ) {
1119                panic("bdevvp failed: fsync");
1120                return (error);
1121        }
1122        if ( (error = buf_invalidateblks(nvp, BUF_WRITE_DATA, 0, 0)) ) {
1123                panic("bdevvp failed: invalidateblks");
1124                return (error);
1125        }
1126        if ( (error = VNOP_OPEN(nvp, FREAD, &context)) ) {
1127                panic("bdevvp failed: open");
1128                return (error);
1129        }
1130        *vpp = nvp;
1131
1132        return (0);
1133}
1134
1135/*
1136 * Check to see if the new vnode represents a special device
1137 * for which we already have a vnode (either because of
1138 * bdevvp() or because of a different vnode representing
1139 * the same block device). If such an alias exists, deallocate
1140 * the existing contents and return the aliased vnode. The
1141 * caller is responsible for filling it with its new contents.
1142 */
1143static vnode_t
1144checkalias(nvp, nvp_rdev)
1145        register struct vnode *nvp;
1146        dev_t nvp_rdev;
1147{
1148        struct vnode *vp;
1149        struct vnode **vpp;
1150        int vid = 0;
1151
1152        vpp = &speclisth[SPECHASH(nvp_rdev)];
1153loop:
1154        SPECHASH_LOCK();
1155
1156        for (vp = *vpp; vp; vp = vp->v_specnext) {
1157                if (nvp_rdev == vp->v_rdev && nvp->v_type == vp->v_type) {
1158                        vid = vp->v_id;
1159                        break;
1160                }
1161        }
1162        SPECHASH_UNLOCK();
1163
1164        if (vp) {
1165                if (vnode_getwithvid(vp,vid)) {
1166                        goto loop;
1167                }
1168                /*
1169                 * Termination state is checked in vnode_getwithvid
1170                 */
1171                vnode_lock(vp);
1172
1173                /*
1174                 * Alias, but not in use, so flush it out.
1175                 */
1176                if ((vp->v_iocount == 1) && (vp->v_usecount == 0)) {
1177                        vnode_reclaim_internal(vp, 1, 0);
1178                        vnode_unlock(vp);
1179                        vnode_put(vp);
1180                        goto loop;
1181                }
1182        }
1183        if (vp == NULL || vp->v_tag != VT_NON) {
1184                MALLOC_ZONE(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo),
1185                            M_SPECINFO, M_WAITOK);
1186                bzero(nvp->v_specinfo, sizeof(struct specinfo));
1187                nvp->v_rdev = nvp_rdev;
1188                nvp->v_specflags = 0;
1189                nvp->v_speclastr = -1;
1190
1191                SPECHASH_LOCK();
1192                nvp->v_hashchain = vpp;
1193                nvp->v_specnext = *vpp;
1194                *vpp = nvp;
1195                SPECHASH_UNLOCK();
1196
1197                if (vp != NULLVP) {
1198                        nvp->v_flag |= VALIASED;
1199                        vp->v_flag |= VALIASED;
1200                        vnode_unlock(vp);
1201                        vnode_put(vp);
1202                }
1203                return (NULLVP);
1204        }
1205        return (vp);
1206}
1207
1208
1209/*
1210 * Get a reference on a particular vnode and lock it if requested.
1211 * If the vnode was on the inactive list, remove it from the list.
1212 * If the vnode was on the free list, remove it from the list and
1213 * move it to inactive list as needed.
1214 * The vnode lock bit is set if the vnode is being eliminated in
1215 * vgone. The process is awakened when the transition is completed,
1216 * and an error returned to indicate that the vnode is no longer
1217 * usable (possibly having been changed to a new file system type).
1218 */
1219static int
1220vget_internal(vnode_t vp, int vid, int vflags)
1221{
1222        int error = 0;
1223        u_long vpid;
1224
1225        vnode_lock(vp);
1226
1227        if (vflags & VNODE_WITHID)
1228                vpid = vid;
1229        else
1230                vpid = vp->v_id;    // save off the original v_id
1231
1232        if ((vflags & VNODE_WRITEABLE) && (vp->v_writecount == 0))
1233                /*
1234                 * vnode to be returned only if it has writers opened 
1235                 */
1236                error = EINVAL;
1237        else
1238                error = vnode_getiocount(vp, 1, vpid, vflags);
1239
1240        vnode_unlock(vp);
1241
1242        return (error);
1243}
1244
1245int
1246vnode_ref(vnode_t vp)
1247{
1248
1249        return (vnode_ref_ext(vp, 0));
1250}
1251
1252int
1253vnode_ref_ext(vnode_t vp, int fmode)
1254{
1255        int     error = 0;
1256
1257        vnode_lock(vp);
1258
1259        /*
1260         * once all the current call sites have been fixed to insure they have
1261         * taken an iocount, we can toughen this assert up and insist that the
1262         * iocount is non-zero... a non-zero usecount doesn't insure correctness
1263         */
1264        if (vp->v_iocount <= 0 && vp->v_usecount <= 0) 
1265                panic("vnode_ref_ext: vp %x has no valid reference %d, %d", vp, vp->v_iocount, vp->v_usecount);
1266
1267        /*
1268         * if you are the owner of drain/termination, can acquire usecount
1269         */
1270        if ((vp->v_lflag & (VL_DRAIN | VL_TERMINATE | VL_DEAD))) {
1271                if (vp->v_owner != current_thread()) {
1272                        error = ENOENT;
1273                        goto out;
1274                }
1275        }
1276        vp->v_usecount++;
1277
1278        if (fmode & FWRITE) {
1279                if (++vp->v_writecount <= 0)
1280                        panic("vnode_ref_ext: v_writecount");
1281        }
1282        if (fmode & O_EVTONLY) {
1283                if (++vp->v_kusecount <= 0)
1284                        panic("vnode_ref_ext: v_kusecount");
1285        }
1286out:
1287        vnode_unlock(vp);
1288
1289        return (error);
1290}
1291
1292
1293/*
1294 * put the vnode on appropriate free list.
1295 * called with vnode LOCKED
1296 */
1297static void
1298vnode_list_add(vnode_t vp)
1299{
1300
1301        /*
1302         * if it is already on a list or non zero references return 
1303         */
1304        if (VONLIST(vp) || (vp->v_usecount != 0) || (vp->v_iocount != 0))
1305                return;
1306        vnode_list_lock();
1307
1308        /*
1309         * insert at tail of LRU list or at head if VAGE or VL_DEAD is set
1310         */
1311        if ((vp->v_flag & VAGE) || (vp->v_lflag & VL_DEAD)) {
1312                TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1313                vp->v_flag &= ~VAGE;
1314        } else {
1315                TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
1316        }
1317        freevnodes++;
1318
1319        vnode_list_unlock();
1320}
1321
1322/*
1323 * remove the vnode from appropriate free list.
1324 */
1325static void
1326vnode_list_remove(vnode_t vp)
1327{
1328        /*
1329         * we want to avoid taking the list lock
1330         * in the case where we're not on the free
1331         * list... this will be true for most
1332         * directories and any currently in use files
1333         *
1334         * we're guaranteed that we can't go from
1335         * the not-on-list state to the on-list 
1336         * state since we hold the vnode lock...
1337         * all calls to vnode_list_add are done
1338         * under the vnode lock... so we can
1339         * check for that condition (the prevelant one)
1340         * without taking the list lock
1341         */
1342        if (VONLIST(vp)) {
1343                vnode_list_lock();
1344                /*
1345                 * however, we're not guaranteed that
1346                 * we won't go from the on-list state
1347                 * to the non-on-list state until we
1348                 * hold the vnode_list_lock... this 
1349                 * is due to new_vnode removing vnodes
1350                 * from the free list uder the list_lock
1351                 * w/o the vnode lock... so we need to
1352                 * check again whether we're currently
1353                 * on the free list
1354                 */
1355                if (VONLIST(vp)) {
1356                        VREMFREE("vnode_list_remove", vp);
1357                        VLISTNONE(vp);
1358                }
1359                vnode_list_unlock();
1360        }
1361}
1362
1363
1364void
1365vnode_rele(vnode_t vp)
1366{
1367        vnode_rele_internal(vp, 0, 0, 0);
1368}
1369
1370
1371void
1372vnode_rele_ext(vnode_t vp, int fmode, int dont_reenter)
1373{
1374        vnode_rele_internal(vp, fmode, dont_reenter, 0);
1375}
1376
1377
1378void
1379vnode_rele_internal(vnode_t vp, int fmode, int dont_reenter, int locked)
1380{
1381        struct vfs_context context;
1382
1383        if ( !locked)
1384                vnode_lock(vp);
1385
1386        if (--vp->v_usecount < 0)
1387                panic("vnode_rele_ext: vp %x usecount -ve : %d", vp,  vp->v_usecount);
1388
1389        if (fmode & FWRITE) {
1390                if (--vp->v_writecount < 0)
1391                        panic("vnode_rele_ext: vp %x writecount -ve : %d", vp,  vp->v_writecount);
1392        }
1393        if (fmode & O_EVTONLY) {
1394                if (--vp->v_kusecount < 0)
1395                        panic("vnode_rele_ext: vp %x kusecount -ve : %d", vp,  vp->v_kusecount);
1396        }
1397        if ((vp->v_iocount > 0) || (vp->v_usecount > 0)) {
1398                /*
1399                 * vnode is still busy... if we're the last
1400                 * usecount, mark for a future call to VNOP_INACTIVE
1401                 * when the iocount finally drops to 0
1402                 */
1403                if (vp->v_usecount == 0) {
1404                        vp->v_lflag |= VL_NEEDINACTIVE;
1405                        vp->v_flag  &= ~(VNOCACHE_DATA | VRAOFF);
1406                }
1407                if ( !locked)
1408                        vnode_unlock(vp);
1409                return;
1410        }
1411        vp->v_flag  &= ~(VNOCACHE_DATA | VRAOFF);
1412
1413        if ( (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) || dont_reenter) {
1414                /*
1415                 * vnode is being cleaned, or
1416                 * we've requested that we don't reenter
1417                 * the filesystem on this release... in
1418                 * this case, we'll mark the vnode aged
1419                 * if it's been marked for termination
1420                 */
1421                if (dont_reenter) {
1422                        if ( !(vp->v_lflag & (VL_TERMINATE | VL_DEAD | VL_MARKTERM)) )
1423                                vp->v_lflag |= VL_NEEDINACTIVE;
1424                        vp->v_flag |= VAGE;
1425                }
1426                vnode_list_add(vp);
1427                if ( !locked)
1428                        vnode_unlock(vp);
1429                return;
1430        }
1431        /*
1432         * at this point both the iocount and usecount
1433         * are zero
1434         * pick up an iocount so that we can call
1435         * VNOP_INACTIVE with the vnode lock unheld
1436         */
1437        vp->v_iocount++;
1438#ifdef JOE_DEBUG
1439        record_vp(vp, 1);
1440#endif
1441        vp->v_lflag &= ~VL_NEEDINACTIVE;
1442        vnode_unlock(vp);
1443
1444        context.vc_proc = current_proc();
1445        context.vc_ucred = kauth_cred_get();
1446        VNOP_INACTIVE(vp, &context);
1447
1448        vnode_lock(vp);
1449        /*
1450         * because we dropped the vnode lock to call VNOP_INACTIVE
1451         * the state of the vnode may have changed... we may have
1452         * picked up an iocount, usecount or the MARKTERM may have
1453         * been set... we need to reevaluate the reference counts
1454         * to determine if we can call vnode_reclaim_internal at
1455         * this point... if the reference counts are up, we'll pick
1456         * up the MARKTERM state when they get subsequently dropped
1457         */
1458        if ( (vp->v_iocount == 1) && (vp->v_usecount == 0) &&
1459             ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)) {
1460                struct  uthread *ut;
1461
1462                ut = get_bsdthread_info(current_thread());
1463                
1464                if (ut->uu_defer_reclaims) {
1465                        vp->v_defer_reclaimlist = ut->uu_vreclaims;
1466                                ut->uu_vreclaims = vp;
1467                        goto defer_reclaim;
1468                }
1469                vnode_reclaim_internal(vp, 1, 0);
1470        }
1471        vnode_dropiocount(vp, 1);
1472        vnode_list_add(vp);
1473defer_reclaim:
1474        if ( !locked)
1475                vnode_unlock(vp);
1476        return;
1477}
1478
1479/*
1480 * Remove any vnodes in the vnode table belonging to mount point mp.
1481 *
1482 * If MNT_NOFORCE is specified, there should not be any active ones,
1483 * return error if any are found (nb: this is a user error, not a
1484 * system error). If MNT_FORCE is specified, detach any active vnodes
1485 * that are found.
1486 */
1487#if DIAGNOSTIC
1488int busyprt = 0;        /* print out busy vnodes */
1489#if 0
1490struct ctldebug debug1 = { "busyprt", &busyprt };
1491#endif /* 0 */
1492#endif
1493
1494int
1495vflush(mp, skipvp, flags)
1496        struct mount *mp;
1497        struct vnode *skipvp;
1498        int flags;
1499{
1500        struct proc *p = current_proc();
1501        struct vnode *vp;
1502        int busy = 0;
1503        int reclaimed = 0;
1504        int vid, retval;
1505
1506        mount_lock(mp);
1507        vnode_iterate_setup(mp);
1508        /*
1509         * On regular unmounts(not forced) do a
1510         * quick check for vnodes to be in use. This
1511         * preserves the caching of vnodes. automounter
1512         * tries unmounting every so often to see whether
1513         * it is still busy or not.
1514         */
1515        if ((flags & FORCECLOSE)==0) {
1516                if (vnode_umount_preflight(mp, skipvp, flags)) {
1517                        vnode_iterate_clear(mp);
1518                        mount_unlock(mp);
1519                        return(EBUSY);
1520                }
1521        }
1522loop:
1523        /* it is returns 0 then there is nothing to do */
1524        retval = vnode_iterate_prepare(mp);
1525
1526        if (retval == 0)  {
1527                vnode_iterate_clear(mp);
1528                mount_unlock(mp);
1529                return(retval);
1530        }
1531
1532    /* iterate over all the vnodes */
1533    while (!TAILQ_EMPTY(&mp->mnt_workerqueue)) {
1534        vp = TAILQ_FIRST(&mp->mnt_workerqueue);
1535        TAILQ_REMOVE(&mp->mnt_workerqueue, vp, v_mntvnodes);
1536        TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
1537        if ( (vp->v_mount != mp) || (vp == skipvp)) {
1538            continue;
1539        }
1540        vid = vp->v_id;
1541        mount_unlock(mp);
1542                vnode_lock(vp);
1543
1544                if ((vp->v_id != vid) || ((vp->v_lflag & (VL_DEAD | VL_TERMINATE)))) {
1545                                vnode_unlock(vp);
1546                                mount_lock(mp);
1547                                continue;
1548                }
1549
1550                /*
1551                 * If requested, skip over vnodes marked VSYSTEM.
1552                 * Skip over all vnodes marked VNOFLUSH.
1553                 */
1554                if ((flags & SKIPSYSTEM) && ((vp->v_flag & VSYSTEM) ||
1555                    (vp->v_flag & VNOFLUSH))) {
1556                        vnode_unlock(vp);
1557                        mount_lock(mp);
1558                        continue;
1559                }
1560                /*
1561                 * If requested, skip over vnodes marked VSWAP.
1562                 */
1563                if ((flags & SKIPSWAP) && (vp->v_flag & VSWAP)) {
1564                        vnode_unlock(vp);
1565                        mount_lock(mp);
1566                        continue;
1567                }
1568                /*
1569                 * If requested, skip over vnodes marked VSWAP.
1570                 */
1571                if ((flags & SKIPROOT) && (vp->v_flag & VROOT)) {
1572                        vnode_unlock(vp);
1573                        mount_lock(mp);
1574                        continue;
1575                }
1576                /*
1577                 * If WRITECLOSE is set, only flush out regular file
1578                 * vnodes open for writing.
1579                 */
1580                if ((flags & WRITECLOSE) &&
1581                    (vp->v_writecount == 0 || vp->v_type != VREG)) {
1582                        vnode_unlock(vp);
1583                        mount_lock(mp);
1584                        continue;
1585                }
1586                /*
1587                 * If the real usecount is 0, all we need to do is clear
1588                 * out the vnode data structures and we are done.
1589                 */
1590                if (((vp->v_usecount == 0) ||
1591                    ((vp->v_usecount - vp->v_kusecount) == 0))) {
1592                        vp->v_iocount++;        /* so that drain waits for * other iocounts */
1593#ifdef JOE_DEBUG
1594                        record_vp(vp, 1);
1595#endif
1596                        vnode_reclaim_internal(vp, 1, 0);
1597                        vnode_dropiocount(vp, 1);
1598                        vnode_list_add(vp);
1599
1600                        vnode_unlock(vp);
1601                        reclaimed++;
1602                        mount_lock(mp);
1603                        continue;
1604                }
1605                /*
1606                 * If FORCECLOSE is set, forcibly close the vnode.
1607                 * For block or character devices, revert to an
1608                 * anonymous device. For all other files, just kill them.
1609                 */
1610                if (flags & FORCECLOSE) {
1611                        if (vp->v_type != VBLK && vp->v_type != VCHR) {
1612                                vp->v_iocount++;        /* so that drain waits * for other iocounts */
1613#ifdef JOE_DEBUG
1614                                record_vp(vp, 1);
1615#endif
1616                                vnode_reclaim_internal(vp, 1, 0);
1617                                vnode_dropiocount(vp, 1);
1618                                vnode_list_add(vp);
1619                                vnode_unlock(vp);
1620                        } else {
1621                                vclean(vp, 0, p);
1622                                vp->v_lflag &= ~VL_DEAD;
1623                                vp->v_op = spec_vnodeop_p;
1624                                vnode_unlock(vp);
1625                        }
1626                        mount_lock(mp);
1627                        continue;
1628                }
1629#if DIAGNOSTIC
1630                if (busyprt)
1631                        vprint("vflush: busy vnode", vp);
1632#endif
1633                vnode_unlock(vp);
1634                mount_lock(mp);
1635                busy++;
1636        }
1637
1638        /* At this point the worker queue is completed */
1639        if (busy && ((flags & FORCECLOSE)==0) && reclaimed) {
1640                busy = 0;
1641                reclaimed = 0;
1642                (void)vnode_iterate_reloadq(mp);
1643                /* returned with mount lock held */
1644                goto loop;
1645        }
1646
1647        /* if new vnodes were created in between retry the reclaim */
1648        if ( vnode_iterate_reloadq(mp) != 0) {
1649                if (!(busy && ((flags & FORCECLOSE)==0)))
1650                        goto loop;
1651        }
1652        vnode_iterate_clear(mp);
1653        mount_unlock(mp);
1654
1655        if (busy && ((flags & FORCECLOSE)==0))
1656                return (EBUSY);
1657        return (0);
1658}
1659
1660int num_recycledvnodes=0;
1661/*
1662 * Disassociate the underlying file system from a vnode.
1663 * The vnode lock is held on entry.
1664 */
1665static void
1666vclean(vnode_t vp, int flags, proc_t p)
1667{
1668        struct vfs_context context;
1669        int active;
1670        int need_inactive;
1671        int already_terminating;
1672        kauth_cred_t ucred = NULL;
1673
1674        context.vc_proc = p;
1675        context.vc_ucred = kauth_cred_get();
1676
1677        /*
1678         * Check to see if the vnode is in use.
1679         * If so we have to reference it before we clean it out
1680         * so that its count cannot fall to zero and generate a
1681         * race against ourselves to recycle it.
1682         */
1683        active = vp->v_usecount;
1684
1685        /*
1686         * just in case we missed sending a needed
1687         * VNOP_INACTIVE, we'll do it now
1688         */
1689        need_inactive = (vp->v_lflag & VL_NEEDINACTIVE);
1690
1691        vp->v_lflag &= ~VL_NEEDINACTIVE;
1692
1693        /*
1694         * Prevent the vnode from being recycled or
1695         * brought into use while we clean it out.
1696         */
1697        already_terminating = (vp->v_lflag & VL_TERMINATE);
1698
1699        vp->v_lflag |= VL_TERMINATE;
1700
1701        /*
1702         * remove the vnode from any mount list
1703         * it might be on...
1704         */
1705        insmntque(vp, (struct mount *)0);
1706
1707        ucred = vp->v_cred;
1708        vp->v_cred = NULL;
1709
1710        vnode_unlock(vp);
1711
1712        if (ucred)
1713                kauth_cred_rele(ucred);
1714
1715        OSAddAtomic(1, &num_recycledvnodes);
1716        /*
1717         * purge from the name cache as early as possible...
1718         */
1719        cache_purge(vp);
1720
1721        if (active && (flags & DOCLOSE))
1722                VNOP_CLOSE(vp, IO_NDELAY, &context);
1723
1724        /*
1725         * Clean out any buffers associated with the vnode.
1726         */
1727        if (flags & DOCLOSE) {
1728#if NFSCLIENT
1729                if (vp->v_tag == VT_NFS)
1730                        nfs_vinvalbuf(vp, V_SAVE, NOCRED, p, 0);
1731                else
1732#endif
1733                {
1734                        VNOP_FSYNC(vp, MNT_WAIT, &context);
1735                        buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
1736                }
1737                if (UBCINFOEXISTS(vp))
1738                        /*
1739                         * Clean the pages in VM.
1740                         */
1741                        (void)ubc_sync_range(vp, (off_t)0, ubc_getsize(vp), UBC_PUSHALL);
1742        }
1743        if (UBCINFOEXISTS(vp))
1744                cluster_release(vp->v_ubcinfo);
1745
1746        if (active || need_inactive) 
1747                VNOP_INACTIVE(vp, &context);
1748
1749        /* Destroy ubc named reference */
1750        ubc_destroy_named(vp);
1751
1752        /*
1753         * Reclaim the vnode.
1754         */
1755        if (VNOP_RECLAIM(vp, &context))
1756                panic("vclean: cannot reclaim");
1757        
1758        // make sure the name & parent ptrs get cleaned out!
1759        vnode_update_identity(vp, NULLVP, NULL, 0, 0, VNODE_UPDATE_PARENT | VNODE_UPDATE_NAME);
1760
1761        vnode_lock(vp);
1762
1763        vp->v_mount = dead_mountp;
1764        vp->v_op = dead_vnodeop_p;
1765        vp->v_tag = VT_NON;
1766        vp->v_data = NULL;
1767
1768        vp->v_lflag |= VL_DEAD;
1769
1770        if (already_terminating == 0) {
1771                vp->v_lflag &= ~VL_TERMINATE;
1772                /*
1773                 * Done with purge, notify sleepers of the grim news.
1774                 */
1775                if (vp->v_lflag & VL_TERMWANT) {
1776                        vp->v_lflag &= ~VL_TERMWANT;
1777                        wakeup(&vp->v_lflag);
1778                }
1779        }
1780}
1781
1782/*
1783 * Eliminate all activity associated with  the requested vnode
1784 * and with all vnodes aliased to the requested vnode.
1785 */
1786int
1787vn_revoke(vnode_t vp, int flags, __unused vfs_context_t a_context)
1788{
1789        struct vnode *vq;
1790        int vid;
1791
1792#if DIAGNOSTIC
1793        if ((flags & REVOKEALL) == 0)
1794                panic("vnop_revoke");
1795#endif
1796
1797        if (vp->v_flag & VALIASED) {
1798                /*
1799                 * If a vgone (or vclean) is already in progress,
1800                 * wait until it is done and return.
1801                 */
1802                vnode_lock(vp);
1803                if (vp->v_lflag & VL_TERMINATE) {
1804                        vnode_unlock(vp);
1805                        return(ENOENT);
1806                }
1807                vnode_unlock(vp);
1808                /*
1809                 * Ensure that vp will not be vgone'd while we
1810                 * are eliminating its aliases.
1811                 */
1812                SPECHASH_LOCK();
1813                while (vp->v_flag & VALIASED) {
1814                        for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1815                                if (vq->v_rdev != vp->v_rdev ||
1816                                    vq->v_type != vp->v_type || vp == vq)
1817                                        continue;
1818                                vid = vq->v_id;
1819                                SPECHASH_UNLOCK();
1820                                if (vnode_getwithvid(vq,vid)){
1821                                        SPECHASH_LOCK();        
1822                                        break;
1823                                }
1824                                vnode_reclaim_internal(vq, 0, 0);
1825                                vnode_put(vq);
1826                                SPECHASH_LOCK();
1827                                break;
1828                        }
1829                }
1830                SPECHASH_UNLOCK();
1831        }
1832        vnode_reclaim_internal(vp, 0, 0);
1833
1834        return (0);
1835}
1836
1837/*
1838 * Recycle an unused vnode to the front of the free list.
1839 * Release the passed interlock if the vnode will be recycled.
1840 */
1841int
1842vnode_recycle(vp)
1843        struct vnode *vp;
1844{
1845        vnode_lock(vp);
1846
1847        if (vp->v_iocount || vp->v_usecount) {
1848                vp->v_lflag |= VL_MARKTERM;
1849                vnode_unlock(vp);
1850                return(0);
1851        } 
1852        vnode_reclaim_internal(vp, 1, 0);
1853        vnode_unlock(vp);
1854
1855        return (1);
1856}
1857
1858static int
1859vnode_reload(vnode_t vp)
1860{
1861        vnode_lock(vp);
1862
1863        if ((vp->v_iocount > 1) || vp->v_usecount) {
1864                vnode_unlock(vp);
1865                return(0);
1866        } 
1867        if (vp->v_iocount <= 0)
1868                panic("vnode_reload with no iocount %d", vp->v_iocount);
1869
1870        /* mark for release when iocount is dopped */
1871        vp->v_lflag |= VL_MARKTERM;
1872        vnode_unlock(vp);
1873
1874        return (1);
1875}
1876
1877
1878static void
1879vgone(vnode_t vp)
1880{
1881        struct vnode *vq;
1882        struct vnode *vx;
1883
1884        /*
1885         * Clean out the filesystem specific data.
1886         * vclean also takes care of removing the
1887         * vnode from any mount list it might be on
1888         */
1889        vclean(vp, DOCLOSE, current_proc());
1890
1891        /*
1892         * If special device, remove it from special device alias list
1893         * if it is on one.
1894         */
1895        if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1896                        SPECHASH_LOCK();
1897                        if (*vp->v_hashchain == vp) {
1898                                *vp->v_hashchain = vp->v_specnext;
1899                        } else {
1900                                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1901                                        if (vq->v_specnext != vp)
1902                                                continue;
1903                                        vq->v_specnext = vp->v_specnext;
1904                                        break;
1905                                }
1906                        if (vq == NULL)
1907                                panic("missing bdev");
1908                        }
1909                        if (vp->v_flag & VALIASED) {
1910                                vx = NULL;
1911                                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1912                                        if (vq->v_rdev != vp->v_rdev ||
1913                                        vq->v_type != vp->v_type)
1914                                                continue;
1915                                        if (vx)
1916                                                break;
1917                                        vx = vq;
1918                                }
1919                                if (vx == NULL)
1920                                        panic("missing alias");
1921                                if (vq == NULL)
1922                                        vx->v_flag &= ~VALIASED;
1923                                vp->v_flag &= ~VALIASED;
1924                        }
1925                        SPECHASH_UNLOCK();
1926                        {
1927                        struct specinfo *tmp = vp->v_specinfo;
1928                        vp->v_specinfo = NULL;
1929                        FREE_ZONE((void *)tmp, sizeof(struct specinfo), M_SPECINFO);
1930                        }
1931        }
1932}
1933
1934/*
1935 * Lookup a vnode by device number.
1936 */
1937int
1938check_mountedon(dev_t dev, enum vtype type, int  *errorp)
1939{
1940        vnode_t vp;
1941        int rc = 0;
1942        int vid;
1943
1944loop:
1945        SPECHASH_LOCK();
1946        for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1947                if (dev != vp->v_rdev || type != vp->v_type)
1948                        continue;
1949                vid = vp->v_id;
1950                SPECHASH_UNLOCK();
1951                if (vnode_getwithvid(vp,vid))
1952                        goto loop;
1953                vnode_lock(vp);
1954                if ((vp->v_usecount > 0) || (vp->v_iocount > 1)) {
1955                        vnode_unlock(vp);
1956                        if ((*errorp = vfs_mountedon(vp)) != 0)
1957                                rc = 1;
1958                } else
1959                        vnode_unlock(vp);
1960                vnode_put(vp);
1961                return(rc);
1962        }
1963        SPECHASH_UNLOCK();
1964        return (0);
1965}
1966
1967/*
1968 * Calculate the total number of references to a special device.
1969 */
1970int
1971vcount(vnode_t vp)
1972{
1973        vnode_t vq, vnext;
1974        int count;
1975        int vid;
1976
1977loop:
1978        if ((vp->v_flag & VALIASED) == 0)
1979                return (vp->v_usecount - vp->v_kusecount);
1980
1981        SPECHASH_LOCK();
1982        for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1983                vnext = vq->v_specnext;
1984                if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1985                        continue;
1986                vid = vq->v_id;
1987                SPECHASH_UNLOCK();
1988
1989                if (vnode_getwithvid(vq, vid)) {
1990                        goto loop;
1991                }
1992                /*
1993                 * Alias, but not in use, so flush it out.
1994                 */
1995                vnode_lock(vq);
1996                if ((vq->v_usecount == 0) && (vq->v_iocount == 1)  && vq != vp) {
1997                        vnode_reclaim_internal(vq, 1, 0);
1998                        vnode_unlock(vq);
1999                        vnode_put(vq);
2000                        goto loop;
2001                }
2002                count += (vq->v_usecount - vq->v_kusecount);
2003                vnode_unlock(vq);
2004                vnode_put(vq);  
2005
2006                SPECHASH_LOCK();
2007        }
2008        SPECHASH_UNLOCK();
2009
2010        return (count);
2011}
2012
2013int     prtactive = 0;          /* 1 => print out reclaim of active vnodes */
2014
2015/*
2016 * Print out a description of a vnode.
2017 */
2018static char *typename[] =
2019   { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
2020
2021void
2022vprint(const char *label, struct vnode *vp)
2023{
2024        char sbuf[64];
2025
2026        if (label != NULL)
2027                printf("%s: ", label);
2028        printf("type %s, usecount %d, writecount %d",
2029               typename[vp->v_type], vp->v_usecount, vp->v_writecount);
2030        sbuf[0] = '\0';
2031        if (vp->v_flag & VROOT)
2032                strcat(sbuf, "|VROOT");
2033        if (vp->v_flag & VTEXT)
2034                strcat(sbuf, "|VTEXT");
2035        if (vp->v_flag & VSYSTEM)
2036                strcat(sbuf, "|VSYSTEM");
2037        if (vp->v_flag & VNOFLUSH)
2038                strcat(sbuf, "|VNOFLUSH");
2039        if (vp->v_flag & VBWAIT)
2040                strcat(sbuf, "|VBWAIT");
2041        if (vp->v_flag & VALIASED)
2042                strcat(sbuf, "|VALIASED");
2043        if (sbuf[0] != '\0')
2044                printf(" flags (%s)", &sbuf[1]);
2045}
2046
2047
2048int
2049vn_getpath(struct vnode *vp, char *pathbuf, int *len)
2050{
2051    return build_path(vp, pathbuf, *len, len);
2052}
2053
2054
2055static char *extension_table=NULL;
2056static int   nexts;
2057static int   max_ext_width;
2058
2059static int
2060extension_cmp(void *a, void *b)
2061{
2062    return (strlen((char *)a) - strlen((char *)b));
2063}
2064
2065
2066//
2067// This is the api LaunchServices uses to inform the kernel
2068// the list of package extensions to ignore.
2069//
2070// Internally we keep the list sorted by the length of the
2071// the extension (from longest to shortest).  We sort the
2072// list of extensions so that we can speed up our searches
2073// when comparing file names -- we only compare extensions
2074// that could possibly fit into the file name, not all of
2075// them (i.e. a short 8 character name can't have an 8
2076// character extension).
2077//
2078__private_extern__ int
2079set_package_extensions_table(void *data, int nentries, int maxwidth)
2080{
2081    char *new_exts, *ptr;
2082    int error, i, len;
2083    
2084    if (nentries <= 0 || nentries > 1024 || maxwidth <= 0 || maxwidth > 255) {
2085        return EINVAL;
2086    }
2087
2088    MALLOC(new_exts, char *, nentries * maxwidth, M_TEMP, M_WAITOK);
2089    
2090    error = copyin(CAST_USER_ADDR_T(data), new_exts, nentries * maxwidth);
2091    if (error) {
2092        FREE(new_exts, M_TEMP);
2093        return error;
2094    }
2095
2096    if (extension_table) {
2097        FREE(extension_table, M_TEMP);
2098    }
2099    extension_table = new_exts;
2100    nexts           = nentries;
2101    max_ext_width   = maxwidth;
2102
2103    qsort(extension_table, nexts, maxwidth, extension_cmp);
2104
2105    return 0;
2106}
2107
2108
2109__private_extern__ int
2110is_package_name(char *name, int len)
2111{
2112    int i, extlen;
2113    char *ptr, *name_ext;
2114    
2115    if (len <= 3) {
2116        return 0;
2117    }
2118
2119    name_ext = NULL;
2120    for(ptr=name; *ptr != '\0'; ptr++) {
2121        if (*ptr == '.') {
2122            name_ext = ptr;
2123        }
2124    }
2125
2126    // if there is no "." extension, it can't match
2127    if (name_ext == NULL) {
2128        return 0;
2129    }
2130
2131    // advance over the "."
2132    name_ext++;
2133
2134    // now iterate over all the extensions to see if any match
2135    ptr = &extension_table[0];
2136    for(i=0; i < nexts; i++, ptr+=max_ext_width) {
2137        extlen = strlen(ptr);
2138        if (strncasecmp(name_ext, ptr, extlen) == 0 && name_ext[extlen] == '\0') {
2139            // aha, a match!
2140            return 1;
2141        }
2142    }
2143
2144    // if we get here, no extension matched
2145    return 0;
2146}
2147
2148int
2149vn_path_package_check(__unused vnode_t vp, char *path, int pathlen, int *component)
2150{
2151    char *ptr, *end;
2152    int comp=0;
2153    
2154    *component = -1;
2155    if (*path != '/') {
2156        return EINVAL;
2157    }
2158
2159    end = path + 1;
2160    while(end < path + pathlen && *end != '\0') {
2161        while(end < path + pathlen && *end == '/' && *end != '\0') {
2162            end++;
2163        }
2164
2165        ptr = end;
2166
2167        while(end < path + pathlen && *end != '/' && *end != '\0') {
2168            end++;
2169        }
2170
2171        if (end > path + pathlen) {
2172            // hmm, string wasn't null terminated 
2173            return EINVAL;
2174        }
2175
2176        *end = '\0';
2177        if (is_package_name(ptr, end - ptr)) {
2178            *component = comp;
2179            break;
2180        }
2181
2182        end++;
2183        comp++;
2184    }
2185
2186    return 0;
2187}
2188
2189
2190/*
2191 * Top level filesystem related information gathering.
2192 */
2193extern unsigned int vfs_nummntops;
2194
2195int
2196vfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
2197           user_addr_t newp, size_t newlen, struct proc *p)
2198{
2199        struct vfstable *vfsp;
2200        int *username;
2201        u_int usernamelen;
2202        int error;
2203        struct vfsconf *vfsc;
2204
2205        /*
2206         * The VFS_NUMMNTOPS shouldn't be at name[0] since
2207         * is a VFS generic variable. So now we must check
2208         * namelen so we don't end up covering any UFS
2209         * variables (sinc UFS vfc_typenum is 1).
2210         *
2211         * It should have been:
2212         *    name[0]:  VFS_GENERIC
2213         *    name[1]:  VFS_NUMMNTOPS
2214         */
2215        if (namelen == 1 && name[0] == VFS_NUMMNTOPS) {
2216                return (sysctl_rdint(oldp, oldlenp, newp, vfs_nummntops));
2217        }
2218
2219        /* all sysctl names at this level are at least name and field */
2220        if (namelen < 2)
2221                return (EISDIR);                /* overloaded */
2222        if (name[0] != VFS_GENERIC) {
2223                struct vfs_context context;
2224
2225                for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2226                        if (vfsp->vfc_typenum == name[0])
2227                                break;
2228                if (vfsp == NULL)
2229                        return (ENOTSUP);
2230                context.vc_proc = p;
2231                context.vc_ucred = kauth_cred_get();
2232
2233                return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
2234                            oldp, oldlenp, newp, newlen, &context));
2235        }
2236        switch (name[1]) {
2237        case VFS_MAXTYPENUM:
2238                return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
2239        case VFS_CONF:
2240                if (namelen < 3)
2241                        return (ENOTDIR);       /* overloaded */
2242                for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
2243                        if (vfsp->vfc_typenum == name[2])
2244                                break;
2245                if (vfsp == NULL)
2246                        return (ENOTSUP);
2247                vfsc = (struct vfsconf *)vfsp;
2248                if (proc_is64bit(p)) {
2249                    struct user_vfsconf  usr_vfsc;
2250                    usr_vfsc.vfc_vfsops = CAST_USER_ADDR_T(vfsc->vfc_vfsops);
2251                bcopy(vfsc->vfc_name, usr_vfsc.vfc_name, sizeof(usr_vfsc.vfc_name));
2252                    usr_vfsc.vfc_typenum = vfsc->vfc_typenum;
2253                    usr_vfsc.vfc_refcount = vfsc->vfc_refcount;
2254                    usr_vfsc.vfc_flags = vfsc->vfc_flags;
2255                    usr_vfsc.vfc_mountroot = CAST_USER_ADDR_T(vfsc->vfc_mountroot);
2256                    usr_vfsc.vfc_next = CAST_USER_ADDR_T(vfsc->vfc_next);
2257            return (sysctl_rdstruct(oldp, oldlenp, newp, &usr_vfsc,
2258                                    sizeof(usr_vfsc)));
2259                }
2260                else {
2261            return (sysctl_rdstruct(oldp, oldlenp, newp, vfsc,
2262                                    sizeof(struct vfsconf)));
2263                }
2264                
2265        case VFS_SET_PACKAGE_EXTS:
2266                return set_package_extensions_table((void *)name[1], name[2], name[3]);
2267        }
2268        /*
2269         * We need to get back into the general MIB, so we need to re-prepend
2270         * CTL_VFS to our name and try userland_sysctl().
2271         */
2272        usernamelen = namelen + 1;
2273        MALLOC(username, int *, usernamelen * sizeof(*username),
2274            M_TEMP, M_WAITOK);
2275        bcopy(name, username + 1, namelen * sizeof(*name));
2276        username[0] = CTL_VFS;
2277        error = userland_sysctl(p, username, usernamelen, oldp, 
2278                                oldlenp, 1, newp, newlen, oldlenp);
2279        FREE(username, M_TEMP);
2280        return (error);
2281}
2282
2283int kinfo_vdebug = 1;
2284#define KINFO_VNODESLOP 10
2285/*
2286 * Dump vnode list (via sysctl).
2287 * Copyout address of vnode followed by vnode.
2288 */
2289/* ARGSUSED */
2290int
2291sysctl_vnode(__unused user_addr_t where, __unused size_t *sizep)
2292{
2293#if 0
2294        struct mount *mp, *nmp;
2295        struct vnode *nvp, *vp;
2296        char *bp = where, *savebp;
2297        char *ewhere;
2298        int error;
2299
2300#define VPTRSZ  sizeof (struct vnode *)
2301#define VNODESZ sizeof (struct vnode)
2302        if (where == NULL) {
2303                *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
2304                return (0);
2305        }
2306        ewhere = where + *sizep;
2307                
2308        for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
2309                if (vfs_busy(mp, LK_NOWAIT)) {
2310                        nmp = mp->mnt_list.cqe_next;
2311                        continue;
2312                }
2313                savebp = bp;
2314again:
2315                TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
2316                        /*
2317                         * Check that the vp is still associated with
2318                         * this filesystem.  RACE: could have been
2319                         * recycled onto the same filesystem.
2320                         */
2321                        if (vp->v_mount != mp) {
2322                                if (kinfo_vdebug)
2323                                        printf("kinfo: vp changed\n");
2324                                bp = savebp;
2325                                goto again;
2326                        }
2327                        if (bp + VPTRSZ + VNODESZ > ewhere) {
2328                                vfs_unbusy(mp);
2329                                *sizep = bp - where;
2330                                return (ENOMEM);
2331                        }
2332                        if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
2333                            (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) {
2334                                vfs_unbusy(mp);
2335                                return (error);
2336                        }
2337                        bp += VPTRSZ + VNODESZ;
2338                }
2339                nmp = mp->mnt_list.cqe_next;
2340                vfs_unbusy(mp);
2341        }
2342
2343        *sizep = bp - where;
2344        return (0);
2345#else
2346        return(EINVAL);
2347#endif
2348}
2349
2350/*
2351 * Check to see if a filesystem is mounted on a block device.
2352 */
2353int
2354vfs_mountedon(vp)
2355        struct vnode *vp;
2356{
2357        struct vnode *vq;
2358        int error = 0;
2359
2360        SPECHASH_LOCK();
2361        if (vp->v_specflags & SI_MOUNTEDON) {
2362                error = EBUSY;
2363                goto out;
2364        }
2365        if (vp->v_flag & VALIASED) {
2366                for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
2367                        if (vq->v_rdev != vp->v_rdev ||
2368                            vq->v_type != vp->v_type)
2369                                continue;
2370                        if (vq->v_specflags & SI_MOUNTEDON) {
2371                                error = EBUSY;
2372                                break;
2373                        }
2374                }
2375        }
2376out:
2377        SPECHASH_UNLOCK();
2378        return (error);
2379}
2380
2381/*
2382 * Unmount all filesystems. The list is traversed in reverse order
2383 * of mounting to avoid dependencies.
2384 */
2385__private_extern__ void
2386vfs_unmountall()
2387{
2388        struct mount *mp;
2389        struct proc *p = current_proc();
2390        int error;
2391
2392        /*
2393         * Since this only runs when rebooting, it is not interlocked.
2394         */
2395        mount_list_lock();
2396        while(!TAILQ_EMPTY(&mountlist)) {
2397                mp = TAILQ_LAST(&mountlist, mntlist);
2398                mount_list_unlock();
2399                error = dounmount(mp, MNT_FORCE, p);
2400                if (error) {
2401                        mount_list_lock();
2402                        TAILQ_REMOVE(&mountlist, mp, mnt_list);
2403                        printf("unmount of %s failed (", mp->mnt_vfsstat.f_mntonname);
2404                        if (error == EBUSY)
2405                                printf("BUSY)\n");
2406                        else
2407                                printf("%d)\n", error);
2408                        continue;
2409                }
2410                mount_list_lock();
2411        }
2412        mount_list_unlock();
2413}
2414
2415
2416/*  
2417 * This routine is called from vnode_pager_no_senders()
2418 * which in turn can be called with vnode locked by vnode_uncache()
2419 * But it could also get called as a result of vm_object_cache_trim().
2420 * In that case lock state is unknown.
2421 * AGE the vnode so that it gets recycled quickly.
2422 */
2423__private_extern__ void
2424vnode_pager_vrele(struct vnode *vp)
2425{
2426        vnode_lock(vp);
2427
2428        if (!ISSET(vp->v_lflag, VL_TERMINATE))
2429                panic("vnode_pager_vrele: vp not in termination");
2430        vp->v_lflag &= ~VNAMED_UBC;
2431
2432        if (UBCINFOEXISTS(vp)) {
2433                struct ubc_info *uip = vp->v_ubcinfo;
2434
2435                if (ISSET(uip->ui_flags, UI_WASMAPPED))
2436                        SET(vp->v_flag, VWASMAPPED);
2437                vp->v_ubcinfo = UBC_INFO_NULL;
2438
2439                ubc_info_deallocate(uip);
2440        } else {
2441                panic("NO ubcinfo in vnode_pager_vrele");
2442        }
2443        vnode_unlock(vp);
2444
2445        wakeup(&vp->v_lflag);
2446}
2447
2448
2449#include <sys/disk.h>
2450
2451errno_t
2452vfs_init_io_attributes(vnode_t devvp, mount_t mp)
2453{
2454        int     error;
2455        off_t   readblockcnt;
2456        off_t   writeblockcnt;
2457        off_t   readmaxcnt;
2458        off_t   writemaxcnt;
2459        off_t   readsegcnt;
2460        off_t   writesegcnt;
2461        off_t   readsegsize;
2462        off_t   writesegsize;
2463        u_long  blksize;
2464        u_int64_t temp;
2465        struct vfs_context context;
2466
2467        proc_t  p = current_proc();
2468
2469        context.vc_proc = p;
2470        context.vc_ucred = kauth_cred_get();
2471
2472        int isvirtual = 0;
2473        /*
2474         * determine if this mount point exists on the same device as the root
2475         * partition... if so, then it comes under the hard throttle control
2476         */
2477        int        thisunit = -1;
2478        static int rootunit = -1;
2479
2480        if (rootunit == -1) {
2481                if (VNOP_IOCTL(rootvp, DKIOCGETBSDUNIT, (caddr_t)&rootunit, 0, &context))
2482                        rootunit = -1; 
2483                else if (rootvp == devvp)
2484                        mp->mnt_kern_flag |= MNTK_ROOTDEV;
2485        }
2486        if (devvp != rootvp && rootunit != -1) {
2487                if (VNOP_IOCTL(devvp, DKIOCGETBSDUNIT, (caddr_t)&thisunit, 0, &context) == 0) {
2488                        if (thisunit == rootunit)
2489                                mp->mnt_kern_flag |= MNTK_ROOTDEV;
2490                }
2491        }
2492        /*
2493         * force the spec device to re-cache
2494         * the underlying block size in case
2495         * the filesystem overrode the initial value
2496         */
2497        set_fsblocksize(devvp);
2498
2499
2500        if ((error = VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE,
2501                                (caddr_t)&blksize, 0, &context)))
2502                return (error);
2503
2504        mp->mnt_devblocksize = blksize;
2505
2506        if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, &context) == 0) {
2507                if (isvirtual)
2508                        mp->mnt_kern_flag |= MNTK_VIRTUALDEV;
2509        }
2510
2511        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTREAD,
2512                                (caddr_t)&readblockcnt, 0, &context)))
2513                return (error);
2514
2515        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBLOCKCOUNTWRITE,
2516                                (caddr_t)&writeblockcnt, 0, &context)))
2517                return (error);
2518
2519        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTREAD,
2520                                (caddr_t)&readmaxcnt, 0, &context)))
2521                return (error);
2522
2523        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXBYTECOUNTWRITE,
2524                                (caddr_t)&writemaxcnt, 0, &context)))
2525                return (error);
2526
2527        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTREAD,
2528                                (caddr_t)&readsegcnt, 0, &context)))
2529                return (error);
2530
2531        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTCOUNTWRITE,
2532                                (caddr_t)&writesegcnt, 0, &context)))
2533                return (error);
2534
2535        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTREAD,
2536                                (caddr_t)&readsegsize, 0, &context)))
2537                return (error);
2538
2539        if ((error = VNOP_IOCTL(devvp, DKIOCGETMAXSEGMENTBYTECOUNTWRITE,
2540                                (caddr_t)&writesegsize, 0, &context)))
2541                return (error);
2542
2543        if (readmaxcnt)
2544                temp = (readmaxcnt > UINT32_MAX) ? UINT32_MAX : readmaxcnt;
2545        else {
2546                if (readblockcnt) {
2547                        temp = readblockcnt * blksize;
2548                        temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
2549                } else
2550                        temp = MAXPHYS;
2551        }
2552        mp->mnt_maxreadcnt = (u_int32_t)temp;
2553
2554        if (writemaxcnt)
2555                temp = (writemaxcnt > UINT32_MAX) ? UINT32_MAX : writemaxcnt;
2556        else {
2557                if (writeblockcnt) {
2558                        temp = writeblockcnt * blksize;
2559                        temp = (temp > UINT32_MAX) ? UINT32_MAX : temp;
2560                } else
2561                        temp = MAXPHYS;
2562        }
2563        mp->mnt_maxwritecnt = (u_int32_t)temp;
2564
2565        if (readsegcnt) {
2566                temp = (readsegcnt > UINT16_MAX) ? UINT16_MAX : readsegcnt;
2567                mp->mnt_segreadcnt = (u_int16_t)temp;
2568        }
2569        if (writesegcnt) {
2570                temp = (writesegcnt > UINT16_MAX) ? UINT16_MAX : writesegcnt;
2571                mp->mnt_segwritecnt = (u_int16_t)temp;
2572        }
2573        if (readsegsize)
2574                temp = (readsegsize > UINT32_MAX) ? UINT32_MAX : readsegsize;
2575        else
2576                temp = mp->mnt_maxreadcnt;
2577        mp->mnt_maxsegreadsize = (u_int32_t)temp;
2578
2579        if (writesegsize)
2580                temp = (writesegsize > UINT32_MAX) ? UINT32_MAX : writesegsize;
2581        else
2582                temp = mp->mnt_maxwritecnt;
2583        mp->mnt_maxsegwritesize = (u_int32_t)temp;
2584
2585        return (error);
2586}
2587
2588static struct klist fs_klist;
2589
2590void
2591vfs_event_init(void)
2592{
2593
2594        klist_init(&fs_klist);
2595}
2596
2597void
2598vfs_event_signal(__unused fsid_t *fsid, u_int32_t event, __unused intptr_t data)
2599{
2600
2601        KNOTE(&fs_klist, event);
2602}
2603
2604/*
2605 * return the number of mounted filesystems.
2606 */
2607static int
2608sysctl_vfs_getvfscnt(void)
2609{
2610        return(mount_getvfscnt());
2611}
2612
2613
2614static int
2615mount_getvfscnt(void)
2616{
2617        int ret;
2618
2619        mount_list_lock();
2620        ret = nummounts;
2621        mount_list_unlock();
2622        return (ret);
2623
2624}
2625
2626
2627
2628static int
2629mount_fillfsids(fsid_t *fsidlst, int count)
2630{
2631        struct mount *mp;
2632        int actual=0;
2633
2634        actual = 0;
2635        mount_list_lock();
2636        TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2637                if (actual <= count) {
2638                        fsidlst[actual] = mp->mnt_vfsstat.f_fsid;
2639                        actual++;
2640                }
2641        }
2642        mount_list_unlock();
2643        return (actual);
2644
2645}
2646
2647/*
2648 * fill in the array of fsid_t's up to a max of 'count', the actual
2649 * number filled in will be set in '*actual'.  If there are more fsid_t's
2650 * than room in fsidlst then ENOMEM will be returned and '*actual' will
2651 * have the actual count.
2652 * having *actual filled out even in the error case is depended upon.
2653 */
2654static int
2655sysctl_vfs_getvfslist(fsid_t *fsidlst, int count, int *actual)
2656{
2657        struct mount *mp;
2658
2659        *actual = 0;
2660        mount_list_lock();
2661        TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2662                (*actual)++;
2663                if (*actual <= count)
2664                        fsidlst[(*actual) - 1] = mp->mnt_vfsstat.f_fsid;
2665        }
2666        mount_list_unlock();
2667        return (*actual <= count ? 0 : ENOMEM);
2668}
2669
2670static int
2671sysctl_vfs_vfslist SYSCTL_HANDLER_ARGS
2672{
2673        int actual, error;
2674        size_t space;
2675        fsid_t *fsidlst;
2676
2677        /* This is a readonly node. */
2678        if (req->newptr != USER_ADDR_NULL)
2679                return (EPERM);
2680
2681        /* they are querying us so just return the space required. */
2682        if (req->oldptr == USER_ADDR_NULL) {
2683                req->oldidx = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
2684                return 0;
2685        }
2686again:
2687        /*
2688         * Retrieve an accurate count of the amount of space required to copy
2689         * out all the fsids in the system.
2690         */
2691        space = req->oldlen;
2692        req->oldlen = sysctl_vfs_getvfscnt() * sizeof(fsid_t);
2693
2694        /* they didn't give us enough space. */
2695        if (space < req->oldlen)
2696                return (ENOMEM);
2697
2698        MALLOC(fsidlst, fsid_t *, req->oldlen, M_TEMP, M_WAITOK);
2699        error = sysctl_vfs_getvfslist(fsidlst, req->oldlen / sizeof(fsid_t),
2700            &actual);
2701        /*
2702         * If we get back ENOMEM, then another mount has been added while we
2703         * slept in malloc above.  If this is the case then try again.
2704         */
2705        if (error == ENOMEM) {
2706                FREE(fsidlst, M_TEMP);
2707                req->oldlen = space;
2708                goto again;
2709        }
2710        if (error == 0) {
2711                error = SYSCTL_OUT(req, fsidlst, actual * sizeof(fsid_t));
2712        }
2713        FREE(fsidlst, M_TEMP);
2714        return (error);
2715}
2716
2717/*
2718 * Do a sysctl by fsid.
2719 */
2720static int
2721sysctl_vfs_ctlbyfsid SYSCTL_HANDLER_ARGS
2722{
2723        struct vfsidctl vc;
2724        struct user_vfsidctl user_vc;
2725        struct mount *mp;
2726        struct vfsstatfs *sp;
2727        struct proc *p;
2728        int *name;
2729        int error, flags, namelen;
2730        struct vfs_context context;
2731        boolean_t is_64_bit;
2732
2733        name = arg1;
2734        namelen = arg2;
2735        p = req->p;
2736        context.vc_proc = p;
2737        context.vc_ucred = kauth_cred_get();
2738        is_64_bit = proc_is64bit(p);
2739
2740        if (is_64_bit) {
2741                error = SYSCTL_IN(req, &user_vc, sizeof(user_vc));
2742                if (error)
2743                        return (error);
2744                if (user_vc.vc_vers != VFS_CTL_VERS1)
2745                        return (EINVAL);
2746                mp = mount_list_lookupby_fsid(&user_vc.vc_fsid, 0, 0);
2747        } 
2748        else {
2749                error = SYSCTL_IN(req, &vc, sizeof(vc));
2750                if (error)
2751                        return (error);
2752                if (vc.vc_vers != VFS_CTL_VERS1)
2753                        return (EINVAL);
2754                mp = mount_list_lookupby_fsid(&vc.vc_fsid, 0, 0);
2755        }
2756        if (mp == NULL)
2757                return (ENOENT);
2758        /* reset so that the fs specific code can fetch it. */
2759        req->newidx = 0;
2760        /*
2761         * Note if this is a VFS_CTL then we pass the actual sysctl req
2762         * in for "oldp" so that the lower layer can DTRT and use the
2763         * SYSCTL_IN/OUT routines.
2764         */
2765        if (mp->mnt_op->vfs_sysctl != NULL) {
2766                if (is_64_bit) {
2767                        if (vfs_64bitready(mp)) {
2768                                error = mp->mnt_op->vfs_sysctl(name, namelen,
2769                                    CAST_USER_ADDR_T(req),
2770                                    NULL, USER_ADDR_NULL, 0, 
2771                                    &context);
2772                        }
2773                        else {
2774                                error = ENOTSUP;
2775                        }
2776                }
2777                else {
2778                        error = mp->mnt_op->vfs_sysctl(name, namelen,
2779                            CAST_USER_ADDR_T(req),
2780                            NULL, USER_ADDR_NULL, 0, 
2781                            &context);
2782                }
2783                if (error != ENOTSUP)
2784                        return (error);
2785        }
2786        switch (name[0]) {
2787        case VFS_CTL_UMOUNT:
2788                req->newidx = 0;
2789                if (is_64_bit) {
2790                        req->newptr = user_vc.vc_ptr;
2791                        req->newlen = (size_t)user_vc.vc_len;
2792                }
2793                else {
2794                        req->newptr = CAST_USER_ADDR_T(vc.vc_ptr);
2795                        req->newlen = vc.vc_len;
2796                }
2797                error = SYSCTL_IN(req, &flags, sizeof(flags));
2798                if (error)
2799                        break;
2800                error = safedounmount(mp, flags, p);
2801                break;
2802        case VFS_CTL_STATFS:
2803                req->newidx = 0;
2804                if (is_64_bit) {
2805                        req->newptr = user_vc.vc_ptr;
2806                        req->newlen = (size_t)user_vc.vc_len;
2807                }
2808                else {
2809                        req->newptr = CAST_USER_ADDR_T(vc.vc_ptr);
2810                        req->newlen = vc.vc_len;
2811                }
2812                error = SYSCTL_IN(req, &flags, sizeof(flags));
2813                if (error)
2814                        break;
2815                sp = &mp->mnt_vfsstat;
2816                if (((flags & MNT_NOWAIT) == 0 || (flags & MNT_WAIT)) &&
2817                    (error = vfs_update_vfsstat(mp, &context)))
2818                        return (error);
2819                if (is_64_bit) {
2820                        struct user_statfs sfs;
2821                        bzero(&sfs, sizeof(sfs));
2822                        sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2823                        sfs.f_type = mp->mnt_vtable->vfc_typenum;
2824                        sfs.f_bsize = (user_long_t)sp->f_bsize;
2825                        sfs.f_iosize = (user_long_t)sp->f_iosize;
2826                        sfs.f_blocks = (user_long_t)sp->f_blocks;
2827                        sfs.f_bfree = (user_long_t)sp->f_bfree;
2828                        sfs.f_bavail = (user_long_t)sp->f_bavail;
2829                        sfs.f_files = (user_long_t)sp->f_files;
2830                        sfs.f_ffree = (user_long_t)sp->f_ffree;
2831                        sfs.f_fsid = sp->f_fsid;
2832                        sfs.f_owner = sp->f_owner;
2833    
2834                        strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1);
2835                        strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1);
2836                        strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1);
2837            
2838                        error = SYSCTL_OUT(req, &sfs, sizeof(sfs));
2839                }
2840                else {
2841                        struct statfs sfs;
2842                        bzero(&sfs, sizeof(struct statfs));
2843                        sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
2844                        sfs.f_type = mp->mnt_vtable->vfc_typenum;
2845
2846                        /*
2847                         * It's possible for there to be more than 2^^31 blocks in the filesystem, so we
2848                         * have to fudge the numbers here in that case.   We inflate the blocksize in order
2849                         * to reflect the filesystem size as best we can.
2850                         */
2851                        if (sp->f_blocks > LONG_MAX) {
2852                                int             shift;
2853
2854                                /*
2855                                 * Work out how far we have to shift the block count down to make it fit.
2856                                 * Note that it's possible to have to shift so far that the resulting
2857                                 * blocksize would be unreportably large.  At that point, we will clip
2858                                 * any values that don't fit.
2859                                 *
2860                                 * For safety's sake, we also ensure that f_iosize is never reported as
2861                                 * being smaller than f_bsize.
2862                                 */
2863                                for (shift = 0; shift < 32; shift++) {
2864                                        if ((sp->f_blocks >> shift) <= LONG_MAX)
2865                                                break;
2866                                        if ((sp->f_bsize << (shift + 1)) > LONG_MAX)
2867                                                break;
2868                                }
2869#define __SHIFT_OR_CLIP(x, s)   ((((x) >> (s)) > LONG_MAX) ? LONG_MAX : ((x) >> (s)))
2870                                sfs.f_blocks = (long)__SHIFT_OR_CLIP(sp->f_blocks, shift);
2871                                sfs.f_bfree = (long)__SHIFT_OR_CLIP(sp->f_bfree, shift);
2872                                sfs.f_bavail = (long)__SHIFT_OR_CLIP(sp->f_bavail, shift);
2873#undef __SHIFT_OR_CLIP
2874                                sfs.f_bsize = (long)(sp->f_bsize << shift);
2875                                sfs.f_iosize = lmax(sp->f_iosize, sp->f_bsize);
2876                        } else {
2877                                sfs.f_bsize = (long)sp->f_bsize;
2878                                sfs.f_iosize = (long)sp->f_iosize;
2879                                sfs.f_blocks = (long)sp->f_blocks;
2880                                sfs.f_bfree = (long)sp->f_bfree;
2881                                sfs.f_bavail = (long)sp->f_bavail;
2882                        }
2883                        sfs.f_files = (long)sp->f_files;
2884                        sfs.f_ffree = (long)sp->f_ffree;
2885                        sfs.f_fsid = sp->f_fsid;
2886                        sfs.f_owner = sp->f_owner;
2887    
2888                        strncpy(&sfs.f_fstypename, &sp->f_fstypename, MFSNAMELEN-1);
2889                        strncpy(&sfs.f_mntonname, &sp->f_mntonname, MNAMELEN-1);
2890                        strncpy(&sfs.f_mntfromname, &sp->f_mntfromname, MNAMELEN-1);
2891            
2892                        error = SYSCTL_OUT(req, &sfs, sizeof(sfs));
2893                }
2894                break;
2895        default:
2896                return (ENOTSUP);
2897        }
2898        return (error);
2899}
2900
2901static int      filt_fsattach(struct knote *kn);
2902static void     filt_fsdetach(struct knote *kn);
2903static int      filt_fsevent(struct knote *kn, long hint);
2904
2905struct filterops fs_filtops =
2906        { 0, filt_fsattach, filt_fsdetach, filt_fsevent };
2907
2908static int
2909filt_fsattach(struct knote *kn)
2910{
2911
2912        kn->kn_flags |= EV_CLEAR;
2913        KNOTE_ATTACH(&fs_klist, kn);
2914        return (0);
2915}
2916
2917static void
2918filt_fsdetach(struct knote *kn)
2919{
2920
2921        KNOTE_DETACH(&fs_klist, kn);
2922}
2923
2924static int
2925filt_fsevent(struct knote *kn, long hint)
2926{
2927
2928        kn->kn_fflags |= hint;
2929        return (kn->kn_fflags != 0);
2930}
2931
2932static int
2933sysctl_vfs_noremotehang SYSCTL_HANDLER_ARGS
2934{
2935        int out, error;
2936        pid_t pid;
2937        size_t space;
2938        struct proc *p;
2939
2940        /* We need a pid. */
2941        if (req->newptr == USER_ADDR_NULL)
2942                return (EINVAL);
2943
2944        error = SYSCTL_IN(req, &pid, sizeof(pid));
2945        if (error)
2946                return (error);
2947
2948        p = pfind(pid < 0 ? -pid : pid);
2949        if (p == NULL)
2950                return (ESRCH);
2951
2952        /*
2953         * Fetching the value is ok, but we only fetch if the old
2954         * pointer is given.
2955         */
2956        if (req->oldptr != USER_ADDR_NULL) {
2957                out = !((p->p_flag & P_NOREMOTEHANG) == 0);
2958                error = SYSCTL_OUT(req, &out, sizeof(out));
2959                return (error);
2960        }
2961
2962        /* XXX req->p->p_ucred -> kauth_cred_get() ??? */
2963        /* cansignal offers us enough security. */
2964        if (p != req->p && suser(req->p->p_ucred, &req->p->p_acflag) != 0)
2965                return (EPERM);
2966
2967        if (pid < 0)
2968                p->p_flag &= ~P_NOREMOTEHANG;
2969        else
2970                p->p_flag |= P_NOREMOTEHANG;
2971
2972        return (0);
2973}
2974/* the vfs.generic. branch. */
2975SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RW, 0, "vfs generic hinge");
2976/* retreive a list of mounted filesystem fsid_t */
2977SYSCTL_PROC(_vfs_generic, OID_AUTO, vfsidlist, CTLFLAG_RD,
2978    0, 0, sysctl_vfs_vfslist, "S,fsid", "List of mounted filesystem ids");
2979/* perform operations on filesystem via fsid_t */
2980SYSCTL_NODE(_vfs_generic, OID_AUTO, ctlbyfsid, CTLFLAG_RW,
2981    sysctl_vfs_ctlbyfsid, "ctlbyfsid");
2982SYSCTL_PROC(_vfs_generic, OID_AUTO, noremotehang, CTLFLAG_RW,
2983    0, 0, sysctl_vfs_noremotehang, "I", "noremotehang");
2984        
2985        
2986int num_reusedvnodes=0;
2987
2988static int
2989new_vnode(vnode_t *vpp)
2990{
2991        vnode_t vp;
2992        int retries = 0;                                /* retry incase of tablefull */
2993        int vpid;
2994        struct timespec ts;
2995
2996retry:
2997        vnode_list_lock();
2998
2999        if ( !TAILQ_EMPTY(&vnode_free_list)) {
3000                /*
3001                 * Pick the first vp for possible reuse
3002                 */
3003                vp = TAILQ_FIRST(&vnode_free_list);
3004
3005                if (vp->v_lflag & VL_DEAD)
3006                        goto steal_this_vp;
3007        } else
3008                vp = NULL;
3009
3010        /*
3011         * we're either empty, or the next guy on the
3012         * list is a valid vnode... if we're under the
3013         * limit, we'll create a new vnode
3014         */
3015        if (numvnodes < desiredvnodes) {
3016                numvnodes++;
3017                vnode_list_unlock();
3018                MALLOC_ZONE(vp, struct vnode *, sizeof *vp, M_VNODE, M_WAITOK);
3019                bzero((char *)vp, sizeof *vp);
3020                VLISTNONE(vp);          /* avoid double queue removal */
3021                lck_mtx_init(&vp->v_lock, vnode_lck_grp, vnode_lck_attr);
3022
3023                nanouptime(&ts);
3024                vp->v_id = ts.tv_nsec;
3025                vp->v_flag = VSTANDARD;
3026
3027                goto done;
3028        }
3029        if (vp == NULL) {
3030                /*
3031                 * we've reached the system imposed maximum number of vnodes
3032                 * but there isn't a single one available
3033                 * wait a bit and then retry... if we can't get a vnode
3034                 * after 100 retries, than log a complaint
3035                 */
3036                if (++retries <= 100) {
3037                        vnode_list_unlock();
3038                        IOSleep(1);
3039                        goto retry;
3040                }
3041                        
3042                vnode_list_unlock();
3043                tablefull("vnode");
3044                log(LOG_EMERG, "%d desired, %d numvnodes, "
3045                        "%d free, %d inactive\n",
3046                        desiredvnodes, numvnodes, freevnodes, inactivevnodes);
3047                *vpp = 0;
3048                return (ENFILE);
3049        }
3050steal_this_vp:
3051        vpid = vp->v_id;
3052
3053        VREMFREE("new_vnode", vp);
3054        VLISTNONE(vp);
3055
3056        vnode_list_unlock();
3057        vnode_lock(vp);
3058
3059        /* 
3060         * We could wait for the vnode_lock after removing the vp from the freelist
3061         * and the vid is bumped only at the very end of reclaim. So it is  possible
3062         * that we are looking at a vnode that is being terminated. If so skip it.
3063         */ 
3064        if ((vpid != vp->v_id) || (vp->v_usecount != 0) || (vp->v_iocount != 0) || 
3065                        VONLIST(vp) || (vp->v_lflag & VL_TERMINATE)) {
3066                /*
3067                 * we lost the race between dropping the list lock
3068                 * and picking up the vnode_lock... someone else
3069                 * used this vnode and it is now in a new state
3070                 * so we need to go back and try again
3071                 */
3072                vnode_unlock(vp);
3073                goto retry;
3074        }
3075        if ( (vp->v_lflag & (VL_NEEDINACTIVE | VL_MARKTERM)) == VL_NEEDINACTIVE ) {
3076                /*
3077                 * we did a vnode_rele_ext that asked for
3078                 * us not to reenter the filesystem during
3079                 * the release even though VL_NEEDINACTIVE was
3080                 * set... we'll do it here by doing a
3081                 * vnode_get/vnode_put
3082                 *
3083                 * pick up an iocount so that we can call
3084                 * vnode_put and drive the VNOP_INACTIVE...
3085                 * vnode_put will either leave us off 
3086                 * the freelist if a new ref comes in,
3087                 * or put us back on the end of the freelist
3088                 * or recycle us if we were marked for termination...
3089                 * so we'll just go grab a new candidate
3090                 */
3091                vp->v_iocount++;
3092#ifdef JOE_DEBUG
3093                record_vp(vp, 1);
3094#endif
3095                vnode_put_locked(vp);
3096                vnode_unlock(vp);
3097                goto retry;
3098        }
3099        OSAddAtomic(1, &num_reusedvnodes);
3100
3101        /* Checks for anyone racing us for recycle */ 
3102        if (vp->v_type != VBAD) {
3103                if (vp->v_lflag & VL_DEAD)
3104                        panic("new_vnode: the vnode is VL_DEAD but not VBAD");
3105
3106                (void)vnode_reclaim_internal(vp, 1, 1);
3107
3108                if ((VONLIST(vp)))
3109                        panic("new_vnode: vp on list ");
3110                if (vp->v_usecount || vp->v_iocount || vp->v_kusecount ||
3111                    (vp->v_lflag & (VNAMED_UBC | VNAMED_MOUNT | VNAMED_FSHASH)))
3112                        panic("new_vnode: free vnode still referenced\n");
3113                if ((vp->v_mntvnodes.tqe_prev != 0) && (vp->v_mntvnodes.tqe_next != 0))
3114                        panic("new_vnode: vnode seems to be on mount list ");
3115                if ( !LIST_EMPTY(&vp->v_nclinks) || !LIST_EMPTY(&vp->v_ncchildren))
3116                        panic("new_vnode: vnode still hooked into the name cache");
3117        }
3118        if (vp->v_unsafefs) {
3119                lck_mtx_destroy(&vp->v_unsafefs->fsnodelock, vnode_lck_grp);
3120                FREE_ZONE((void *)vp->v_unsafefs, sizeof(struct unsafe_fsnode), M_UNSAFEFS);
3121                vp->v_unsafefs = (struct unsafe_fsnode *)NULL;
3122        }
3123        vp->v_lflag = 0;
3124        vp->v_writecount = 0;
3125        vp->v_references = 0;
3126        vp->v_iterblkflags = 0;
3127        vp->v_flag = VSTANDARD;
3128        /* vbad vnodes can point to dead_mountp */
3129        vp->v_mount = 0;
3130        vp->v_defer_reclaimlist = (vnode_t)0;
3131
3132        vnode_unlock(vp);
3133done:
3134        *vpp = vp;
3135
3136        return (0);
3137}
3138
3139void
3140vnode_lock(vnode_t vp)
3141{
3142        lck_mtx_lock(&vp->v_lock);
3143}
3144
3145void
3146vnode_unlock(vnode_t vp)
3147{
3148        lck_mtx_unlock(&vp->v_lock);
3149}
3150
3151
3152
3153int
3154vnode_get(struct vnode *vp)
3155{
3156        vnode_lock(vp);
3157
3158        if ( (vp->v_iocount == 0) && (vp->v_lflag & (VL_TERMINATE | VL_DEAD)) ) {
3159                vnode_unlock(vp);
3160                return(ENOENT); 
3161        }
3162        vp->v_iocount++;
3163#ifdef JOE_DEBUG
3164        record_vp(vp, 1);
3165#endif
3166        vnode_unlock(vp);
3167
3168        return(0);      
3169}
3170
3171int
3172vnode_getwithvid(vnode_t vp, int vid)
3173{
3174        return(vget_internal(vp, vid, ( VNODE_NODEAD| VNODE_WITHID)));
3175}
3176
3177int
3178vnode_getwithref(vnode_t vp)
3179{
3180        return(vget_internal(vp, 0, 0));
3181}
3182
3183
3184int
3185vnode_put(vnode_t vp)
3186{
3187        int retval;
3188
3189        vnode_lock(vp);
3190        retval = vnode_put_locked(vp);
3191        vnode_unlock(vp);
3192
3193        return(retval);
3194}
3195
3196int
3197vnode_put_locked(vnode_t vp)
3198{
3199        struct vfs_context context;
3200
3201retry:
3202        if (vp->v_iocount < 1) 
3203                panic("vnode_put(%x): iocount < 1", vp);
3204
3205        if ((vp->v_usecount > 0) || (vp->v_iocount > 1))  {
3206                vnode_dropiocount(vp, 1);
3207                return(0);
3208        }
3209        if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD | VL_NEEDINACTIVE)) == VL_NEEDINACTIVE) {
3210
3211                vp->v_lflag &= ~VL_NEEDINACTIVE;
3212                vnode_unlock(vp);
3213
3214                context.vc_proc = current_proc();
3215                context.vc_ucred = kauth_cred_get();
3216                VNOP_INACTIVE(vp, &context);
3217
3218                vnode_lock(vp);
3219                /*
3220                 * because we had to drop the vnode lock before calling
3221                 * VNOP_INACTIVE, the state of this vnode may have changed...
3222                 * we may pick up both VL_MARTERM and either
3223                 * an iocount or a usecount while in the VNOP_INACTIVE call
3224                 * we don't want to call vnode_reclaim_internal on a vnode
3225                 * that has active references on it... so loop back around
3226                 * and reevaluate the state
3227                 */
3228                goto retry;
3229        }
3230        vp->v_lflag &= ~VL_NEEDINACTIVE;
3231
3232        if ((vp->v_lflag & (VL_MARKTERM | VL_TERMINATE | VL_DEAD)) == VL_MARKTERM)
3233                vnode_reclaim_internal(vp, 1, 0);
3234
3235        vnode_dropiocount(vp, 1);
3236        vnode_list_add(vp);
3237
3238        return(0);
3239}
3240
3241/* is vnode_t in use by others?  */
3242int 
3243vnode_isinuse(vnode_t vp, int refcnt)
3244{
3245        return(vnode_isinuse_locked(vp, refcnt, 0));
3246}
3247
3248
3249static int 
3250vnode_isinuse_locked(vnode_t vp, int refcnt, int locked)
3251{
3252        int retval = 0;
3253
3254        if (!locked)
3255                vnode_lock(vp);
3256        if ((vp->v_type != VREG) && (vp->v_usecount >  refcnt)) {
3257                retval = 1;
3258                goto out;
3259        }
3260        if (vp->v_type == VREG)  {
3261                retval = ubc_isinuse_locked(vp, refcnt, 1);
3262        }
3263                
3264out:
3265        if (!locked)
3266                vnode_unlock(vp);
3267        return(retval);
3268}
3269
3270
3271/* resume vnode_t */
3272errno_t 
3273vnode_resume(vnode_t vp)
3274{
3275
3276        vnode_lock(vp);
3277
3278        if (vp->v_owner == current_thread()) {
3279                vp->v_lflag &= ~VL_SUSPENDED;
3280                vp->v_owner = 0;
3281                vnode_unlock(vp);
3282                wakeup(&vp->v_iocount);
3283        } else
3284                vnode_unlock(vp);
3285
3286        return(0);
3287}
3288
3289static errno_t 
3290vnode_drain(vnode_t vp)
3291{
3292        
3293        if (vp->v_lflag & VL_DRAIN) {
3294                panic("vnode_drain: recursuve drain");
3295                return(ENOENT);
3296        }
3297        vp->v_lflag |= VL_DRAIN;
3298        vp->v_owner = current_thread();
3299
3300        while (vp->v_iocount > 1)
3301                msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_drain", 0);
3302        return(0);
3303}
3304
3305
3306/*
3307 * if the number of recent references via vnode_getwithvid or vnode_getwithref
3308 * exceeds this threshhold, than 'UN-AGE' the vnode by removing it from
3309 * the LRU list if it's currently on it... once the iocount and usecount both drop
3310 * to 0, it will get put back on the end of the list, effectively making it younger
3311 * this allows us to keep actively referenced vnodes in the list without having
3312 * to constantly remove and add to the list each time a vnode w/o a usecount is
3313 * referenced which costs us taking and dropping a global lock twice.
3314 */
3315#define UNAGE_THRESHHOLD        10
3316
3317errno_t
3318vnode_getiocount(vnode_t vp, int locked, int vid, int vflags)
3319{
3320        int nodead = vflags & VNODE_NODEAD;
3321        int nosusp = vflags & VNODE_NOSUSPEND;
3322
3323        if (!locked)
3324                vnode_lock(vp);
3325
3326        for (;;) {
3327                /*
3328                 * if it is a dead vnode with deadfs
3329                 */
3330                if (nodead && (vp->v_lflag & VL_DEAD) && ((vp->v_type == VBAD) || (vp->v_data == 0))) {
3331                        if (!locked)
3332                                vnode_unlock(vp);
3333                        return(ENOENT);
3334                }
3335                /*
3336                 * will return VL_DEAD ones
3337                 */
3338                if ((vp->v_lflag & (VL_SUSPENDED | VL_DRAIN | VL_TERMINATE)) == 0 ) {
3339                        break;
3340                }
3341                /*
3342                 * if suspended vnodes are to be failed
3343                 */
3344                if (nosusp && (vp->v_lflag & VL_SUSPENDED)) {
3345                        if (!locked)
3346                                vnode_unlock(vp);
3347                        return(ENOENT);
3348                }
3349                /*
3350                 * if you are the owner of drain/suspend/termination , can acquire iocount
3351                 * check for VL_TERMINATE; it does not set owner
3352                 */
3353                if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED | VL_TERMINATE)) &&
3354                    (vp->v_owner == current_thread())) {
3355                        break;
3356                }
3357                if (vp->v_lflag & VL_TERMINATE) {
3358                        vp->v_lflag |= VL_TERMWANT;
3359
3360                        msleep(&vp->v_lflag,   &vp->v_lock, PVFS, "vnode getiocount", 0);
3361                } else
3362                        msleep(&vp->v_iocount, &vp->v_lock, PVFS, "vnode_getiocount", 0);
3363        }
3364        if (vid != vp->v_id) {
3365                if (!locked)
3366                        vnode_unlock(vp);
3367                return(ENOENT);
3368        }
3369        if (++vp->v_references >= UNAGE_THRESHHOLD) {
3370                vp->v_references = 0;
3371                vnode_list_remove(vp);
3372        }
3373        vp->v_iocount++;
3374#ifdef JOE_DEBUG
3375        record_vp(vp, 1);
3376#endif
3377        if (!locked)
3378                vnode_unlock(vp);
3379        return(0);      
3380}
3381
3382static void
3383vnode_dropiocount (vnode_t vp, int locked)
3384{
3385        if (!locked)
3386                vnode_lock(vp);
3387        if (vp->v_iocount < 1)
3388                panic("vnode_dropiocount(%x): v_iocount < 1", vp);
3389
3390        vp->v_iocount--;
3391#ifdef JOE_DEBUG
3392        record_vp(vp, -1);
3393#endif
3394        if ((vp->v_lflag & (VL_DRAIN | VL_SUSPENDED)) && (vp->v_iocount <= 1))
3395                wakeup(&vp->v_iocount);
3396
3397        if (!locked)
3398                vnode_unlock(vp);
3399}
3400
3401
3402void
3403vnode_reclaim(struct vnode * vp)
3404{
3405        vnode_reclaim_internal(vp, 0, 0);
3406}
3407
3408__private_extern__
3409void
3410vnode_reclaim_internal(struct vnode * vp, int locked, int reuse)
3411{
3412        int isfifo = 0;
3413
3414        if (!locked)
3415                vnode_lock(vp);
3416
3417        if (vp->v_lflag & VL_TERMINATE) {
3418                panic("vnode reclaim in progress");
3419        }
3420        vp->v_lflag |= VL_TERMINATE;
3421
3422        if (vnode_drain(vp)) {
3423                panic("vnode drain failed");
3424                vnode_unlock(vp);
3425                return;
3426        }
3427        isfifo = (vp->v_type == VFIFO);
3428
3429        if (vp->v_type != VBAD)
3430                vgone(vp);              /* clean and reclaim the vnode */
3431
3432        /*
3433         * give the vnode a new identity so
3434         * that vnode_getwithvid will fail
3435         * on any stale cache accesses
3436         */
3437        vp->v_id++;
3438        if (isfifo) {
3439                struct fifoinfo * fip;
3440
3441                fip = vp->v_fifoinfo;
3442                vp->v_fifoinfo = NULL;
3443                FREE(fip, M_TEMP);
3444        }
3445
3446        vp->v_type = VBAD;
3447
3448        if (vp->v_data)
3449                panic("vnode_reclaim_internal: cleaned vnode isn't");
3450        if (vp->v_numoutput)
3451                panic("vnode_reclaim_internal: Clean vnode has pending I/O's");
3452        if (UBCINFOEXISTS(vp))
3453                panic("vnode_reclaim_internal: ubcinfo not cleaned");
3454        if (vp->v_parent)
3455                panic("vnode_reclaim_internal: vparent not removed");
3456        if (vp->v_name)
3457                panic("vnode_reclaim_internal: vname not removed");
3458
3459        vp->v_socket = 0;
3460
3461        vp->v_lflag &= ~VL_TERMINATE;
3462        vp->v_lflag &= ~VL_DRAIN;
3463        vp->v_owner = 0;
3464
3465        if (vp->v_lflag & VL_TERMWANT) {
3466                vp->v_lflag &= ~VL_TERMWANT;
3467                wakeup(&vp->v_lflag);
3468        }
3469        if (!reuse && vp->v_usecount == 0)
3470                vnode_list_add(vp);
3471        if (!locked)
3472                vnode_unlock(vp);
3473}
3474
3475/* USAGE:
3476 * The following api creates a vnode and associates all the parameter specified in vnode_fsparam
3477 * structure and returns a vnode handle with a reference. device aliasing is handled here so checkalias
3478 * is obsoleted by this.
3479 *  vnode_create(int flavor, size_t size, void * param,  vnode_t  *vp)
3480 */
3481int  
3482vnode_create(int flavor, size_t size, void *data, vnode_t *vpp)
3483{
3484        int error;
3485        int insert = 1;
3486        vnode_t vp;
3487        vnode_t nvp;
3488        vnode_t dvp;
3489        struct componentname *cnp;
3490        struct vnode_fsparam *param = (struct vnode_fsparam *)data;
3491        
3492        if (flavor == VNCREATE_FLAVOR && (size == VCREATESIZE) && param) {
3493                if ( (error = new_vnode(&vp)) ) {
3494                        return(error);
3495                } else {
3496                        dvp = param->vnfs_dvp;
3497                        cnp = param->vnfs_cnp;
3498
3499                        vp->v_op = param->vnfs_vops;
3500                        vp->v_type = param->vnfs_vtype;
3501                        vp->v_data = param->vnfs_fsnode;
3502                        vp->v_iocount = 1;
3503
3504                        if (param->vnfs_markroot)
3505                                vp->v_flag |= VROOT;
3506                        if (param->vnfs_marksystem)
3507                                vp->v_flag |= VSYSTEM;
3508                        else if (vp->v_type == VREG) {
3509                                /*
3510                                 * only non SYSTEM vp
3511                                 */
3512                                error = ubc_info_init_withsize(vp, param->vnfs_filesize);
3513                                if (error) {
3514#ifdef JOE_DEBUG
3515                                        record_vp(vp, 1);
3516#endif
3517                                        vp->v_mount = 0;
3518                                        vp->v_op = dead_vnodeop_p;
3519                                        vp->v_tag = VT_NON;
3520                                        vp->v_data = NULL;
3521                                        vp->v_type = VBAD;
3522                                        vp->v_lflag |= VL_DEAD;
3523
3524                                        vnode_put(vp);
3525                                        return(error);
3526                                }
3527                        }
3528#ifdef JOE_DEBUG
3529                        record_vp(vp, 1);
3530#endif
3531                        if (vp->v_type == VCHR || vp->v_type == VBLK) {
3532                
3533                                if ( (nvp = checkalias(vp, param->vnfs_rdev)) ) {
3534                                        /*
3535                                         * if checkalias returns a vnode, it will be locked
3536                                         *
3537                                         * first get rid of the unneeded vnode we acquired
3538                                         */
3539                                        vp->v_data = NULL;
3540                                        vp->v_op = spec_vnodeop_p;
3541                                        vp->v_type = VBAD;
3542                                        vp->v_lflag = VL_DEAD;
3543                                        vp->v_data = NULL; 
3544                                        vp->v_tag = VT_NON;
3545                                        vnode_put(vp);
3546
3547                                        /*
3548                                         * switch to aliased vnode and finish
3549                                         * preparing it
3550                                         */
3551                                        vp = nvp;
3552
3553                                        vclean(vp, 0, current_proc());
3554                                        vp->v_op = param->vnfs_vops;
3555                                        vp->v_type = param->vnfs_vtype;
3556                                        vp->v_data = param->vnfs_fsnode;
3557                                        vp->v_lflag = 0;
3558                                        vp->v_mount = NULL;
3559                                        insmntque(vp, param->vnfs_mp);
3560                                        insert = 0;
3561                                        vnode_unlock(vp);
3562                                }
3563                        }
3564
3565                        if (vp->v_type == VFIFO) {
3566                                struct fifoinfo *fip;
3567
3568                                MALLOC(fip, struct fifoinfo *,
3569                                        sizeof(*fip), M_TEMP, M_WAITOK);
3570                                bzero(fip, sizeof(struct fifoinfo ));
3571                                vp->v_fifoinfo = fip;
3572                        }
3573                        /* The file systems usually pass the address of the location where
3574                         * where there store  the vnode pointer. When we add the vnode in mount
3575                         * point and name cache they are discoverable. So the file system node
3576                         * will have the connection to vnode setup by then
3577                         */
3578                        *vpp = vp;
3579
3580                        if (param->vnfs_mp) {
3581                                        if (param->vnfs_mp->mnt_kern_flag & MNTK_LOCK_LOCAL)
3582                                                vp->v_flag |= VLOCKLOCAL;
3583                                if (insert) {
3584                                        /*
3585                                         * enter in mount vnode list
3586                                         */
3587                                        insmntque(vp, param->vnfs_mp);
3588                                }
3589#ifdef INTERIM_FSNODE_LOCK      
3590                                if (param->vnfs_mp->mnt_vtable->vfc_threadsafe == 0) {
3591                                        MALLOC_ZONE(vp->v_unsafefs, struct unsafe_fsnode *,
3592                                                    sizeof(struct unsafe_fsnode), M_UNSAFEFS, M_WAITOK);
3593                                        vp->v_unsafefs->fsnode_count = 0;
3594                                        vp->v_unsafefs->fsnodeowner  = (void *)NULL;
3595                                        lck_mtx_init(&vp->v_unsafefs->fsnodelock, vnode_lck_grp, vnode_lck_attr);
3596                                }
3597#endif /* INTERIM_FSNODE_LOCK */
3598                        }
3599                        if (dvp && vnode_ref(dvp) == 0) {
3600                                vp->v_parent = dvp;
3601                        }
3602                        if (cnp) {
3603                                if (dvp && ((param->vnfs_flags & (VNFS_NOCACHE | VNFS_CANTCACHE)) == 0)) {
3604                                        /*
3605                                         * enter into name cache
3606                                         * we've got the info to enter it into the name cache now
3607                                         */
3608                                        cache_enter(dvp, vp, cnp);
3609                                }
3610                                vp->v_name = vfs_addname(cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_hash, 0);
3611                        }
3612                        if ((param->vnfs_flags & VNFS_CANTCACHE) == 0) {
3613                                /*
3614                                 * this vnode is being created as cacheable in the name cache
3615                                 * this allows us to re-enter it in the cache
3616                                 */
3617                                vp->v_flag |= VNCACHEABLE;
3618                        }
3619                        if ((vp->v_flag & VSYSTEM) && (vp->v_type != VREG))
3620                                panic("incorrect vnode setup");
3621
3622                        return(0);
3623                }
3624        }
3625        return (EINVAL);
3626}
3627
3628int
3629vnode_addfsref(vnode_t vp)
3630{
3631        vnode_lock(vp);
3632        if (vp->v_lflag & VNAMED_FSHASH)
3633                panic("add_fsref: vp already has named reference");
3634        if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb))
3635                panic("addfsref: vp on the free list\n");
3636        vp->v_lflag |= VNAMED_FSHASH;
3637        vnode_unlock(vp);
3638        return(0);
3639
3640}
3641int
3642vnode_removefsref(vnode_t vp)
3643{
3644        vnode_lock(vp);
3645        if ((vp->v_lflag & VNAMED_FSHASH) == 0)
3646                panic("remove_fsref: no named reference");
3647        vp->v_lflag &= ~VNAMED_FSHASH;
3648        vnode_unlock(vp);
3649        return(0);
3650
3651}
3652
3653
3654int
3655vfs_iterate(__unused int flags, int (*callout)(mount_t, void *), void *arg)
3656{
3657        mount_t mp;
3658        int ret = 0;
3659        fsid_t * fsid_list;
3660        int count, actualcount,  i;
3661        void * allocmem;
3662
3663        count = mount_getvfscnt();
3664        count += 10;
3665
3666        fsid_list = (fsid_t *)kalloc(count * sizeof(fsid_t));
3667        allocmem = (void *)fsid_list;
3668
3669        actualcount = mount_fillfsids(fsid_list, count);
3670
3671        for (i=0; i< actualcount; i++) {
3672
3673                /* obtain the mount point with iteration reference */
3674                mp = mount_list_lookupby_fsid(&fsid_list[i], 0, 1);
3675
3676                if(mp == (struct mount *)0)
3677                        continue;
3678                mount_lock(mp);
3679                if (mp->mnt_lflag & (MNT_LDEAD | MNT_LUNMOUNT)) {
3680                        mount_unlock(mp);
3681                        mount_iterdrop(mp);
3682                        continue;
3683                
3684                }
3685                mount_unlock(mp);
3686
3687                /* iterate over all the vnodes */
3688                ret = callout(mp, arg);
3689
3690                mount_iterdrop(mp);
3691
3692                switch (ret) {
3693                case VFS_RETURNED:
3694                case VFS_RETURNED_DONE:
3695                        if (ret == VFS_RETURNED_DONE) {
3696                                ret = 0;
3697                                goto out;
3698                        }
3699                        break;
3700
3701                case VFS_CLAIMED_DONE:
3702                        ret = 0;
3703                        goto out;
3704                case VFS_CLAIMED:
3705                default:
3706                        break;
3707                }
3708                ret = 0;
3709        }
3710
3711out:
3712        kfree(allocmem, (count * sizeof(fsid_t)));
3713        return (ret);
3714}
3715
3716/*
3717 * Update the vfsstatfs structure in the mountpoint.
3718 */
3719int
3720vfs_update_vfsstat(mount_t mp, vfs_context_t ctx)
3721{
3722        struct vfs_attr va;
3723        int             error;
3724
3725        /*
3726         * Request the attributes we want to propagate into
3727         * the per-mount vfsstat structure.
3728         */
3729        VFSATTR_INIT(&va);
3730        VFSATTR_WANTED(&va, f_iosize);
3731        VFSATTR_WANTED(&va, f_blocks);
3732        VFSATTR_WANTED(&va, f_bfree);
3733        VFSATTR_WANTED(&va, f_bavail);
3734        VFSATTR_WANTED(&va, f_bused);
3735        VFSATTR_WANTED(&va, f_files);
3736        VFSATTR_WANTED(&va, f_ffree);
3737        VFSATTR_WANTED(&va, f_bsize);
3738        VFSATTR_WANTED(&va, f_fssubtype);
3739        if ((error = vfs_getattr(mp, &va, ctx)) != 0) {
3740                KAUTH_DEBUG("STAT - filesystem returned error %d", error);
3741                return(error);
3742        }
3743
3744        /*
3745         * Unpack into the per-mount structure.
3746         *
3747         * We only overwrite these fields, which are likely to change:
3748         *      f_blocks
3749         *      f_bfree
3750         *      f_bavail
3751         *      f_bused
3752         *      f_files
3753         *      f_ffree
3754         *
3755         * And these which are not, but which the FS has no other way
3756         * of providing to us:
3757         *      f_bsize
3758         *      f_iosize
3759         *      f_fssubtype
3760         *
3761         */
3762        if (VFSATTR_IS_SUPPORTED(&va, f_bsize)) {
3763                mp->mnt_vfsstat.f_bsize = va.f_bsize;
3764        } else {
3765                mp->mnt_vfsstat.f_bsize = mp->mnt_devblocksize; /* default from the device block size */
3766        }
3767        if (VFSATTR_IS_SUPPORTED(&va, f_iosize)) {
3768                mp->mnt_vfsstat.f_iosize = va.f_iosize;
3769        } else {
3770                mp->mnt_vfsstat.f_iosize = 1024 * 1024;         /* 1MB sensible I/O size */
3771        }
3772        if (VFSATTR_IS_SUPPORTED(&va, f_blocks))
3773                mp->mnt_vfsstat.f_blocks = va.f_blocks;
3774        if (VFSATTR_IS_SUPPORTED(&va, f_bfree))
3775                mp->mnt_vfsstat.f_bfree = va.f_bfree;
3776        if (VFSATTR_IS_SUPPORTED(&va, f_bavail))
3777                mp->mnt_vfsstat.f_bavail = va.f_bavail;
3778        if (VFSATTR_IS_SUPPORTED(&va, f_bused))
3779                mp->mnt_vfsstat.f_bused = va.f_bused;
3780        if (VFSATTR_IS_SUPPORTED(&va, f_files))
3781                mp->mnt_vfsstat.f_files = va.f_files;
3782        if (VFSATTR_IS_SUPPORTED(&va, f_ffree))
3783                mp->mnt_vfsstat.f_ffree = va.f_ffree;
3784
3785        /* this is unlikely to change, but has to be queried for */
3786        if (VFSATTR_IS_SUPPORTED(&va, f_fssubtype))
3787                mp->mnt_vfsstat.f_fssubtype = va.f_fssubtype;
3788
3789        return(0);
3790}
3791
3792void 
3793mount_list_add(mount_t mp)
3794{
3795        mount_list_lock();
3796        TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);    
3797        nummounts++;
3798        mount_list_unlock();
3799}
3800
3801void
3802mount_list_remove(mount_t mp)
3803{
3804        mount_list_lock();
3805        TAILQ_REMOVE(&mountlist, mp, mnt_list);
3806        nummounts--;
3807        mp->mnt_list.tqe_next = 0;
3808        mp->mnt_list.tqe_prev = 0;
3809        mount_list_unlock();
3810}
3811
3812mount_t
3813mount_lookupby_volfsid(int volfs_id, int withref)
3814{
3815        mount_t cur_mount = (mount_t)0;
3816        mount_t mp ;
3817
3818        mount_list_lock();
3819        TAILQ_FOREACH(mp, &mountlist, mnt_list) { 
3820                if (validfsnode(mp) && mp->mnt_vfsstat.f_fsid.val[0] == volfs_id) {
3821            cur_mount = mp;
3822                        if (withref) {
3823                                if (mount_iterref(cur_mount, 1))  {
3824                                        cur_mount = (mount_t)0;
3825                                        mount_list_unlock();
3826                                        goto out;
3827                                }
3828                        }
3829            break;
3830          }
3831        }
3832        mount_list_unlock();
3833        if (withref && (cur_mount != (mount_t)0)) {
3834                mp = cur_mount;
3835                if (vfs_busy(mp, LK_NOWAIT) != 0) {
3836                        cur_mount = (mount_t)0;
3837        } 
3838                mount_iterdrop(mp);
3839        }
3840out:
3841        return(cur_mount);
3842}
3843
3844
3845mount_t 
3846mount_list_lookupby_fsid(fsid, locked, withref)
3847        fsid_t *fsid;
3848        int locked;
3849        int withref;
3850{
3851        mount_t retmp = (mount_t)0;
3852        mount_t mp;
3853
3854        if (!locked)
3855                mount_list_lock();
3856        TAILQ_FOREACH(mp, &mountlist, mnt_list) 
3857                if (mp->mnt_vfsstat.f_fsid.val[0] == fsid->val[0] &&
3858                    mp->mnt_vfsstat.f_fsid.val[1] == fsid->val[1]) {
3859                        retmp = mp;
3860                        if (withref) {
3861                                if (mount_iterref(retmp, 1)) 
3862                                        retmp = (mount_t)0;
3863                        }
3864                        goto out;
3865                }
3866out:
3867        if (!locked)
3868                mount_list_unlock();
3869        return (retmp);
3870}
3871
3872errno_t
3873vnode_lookup(const char *path, int flags, vnode_t *vpp, vfs_context_t context)
3874{
3875        struct nameidata nd;
3876        int error;
3877        struct vfs_context context2;
3878        vfs_context_t ctx = context;
3879        u_long ndflags = 0;
3880
3881        if (context == NULL) {          /* XXX technically an error */
3882                context2.vc_proc = current_proc();
3883                context2.vc_ucred = kauth_cred_get();
3884                ctx = &context2;
3885        }
3886
3887        if (flags & VNODE_LOOKUP_NOFOLLOW)
3888                ndflags = NOFOLLOW;
3889        else
3890                ndflags = FOLLOW;
3891
3892        if (flags & VNODE_LOOKUP_NOCROSSMOUNT)
3893                ndflags |= NOCROSSMOUNT;
3894        if (flags & VNODE_LOOKUP_DOWHITEOUT)
3895                ndflags |= DOWHITEOUT;
3896
3897        /* XXX AUDITVNPATH1 needed ? */
3898        NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
3899
3900        if ((error = namei(&nd)))
3901                return (error);
3902        *vpp = nd.ni_vp;
3903        nameidone(&nd);
3904        
3905        return (0);
3906}
3907
3908errno_t
3909vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_context_t context)
3910{
3911        struct nameidata nd;
3912        int error;
3913        struct vfs_context context2;
3914        vfs_context_t ctx = context;
3915        u_long ndflags = 0;
3916        int lflags = flags;
3917
3918        if (context == NULL) {          /* XXX technically an error */
3919                context2.vc_proc = current_proc();
3920                context2.vc_ucred = kauth_cred_get();
3921                ctx = &context2;
3922        }
3923
3924        if (fmode & O_NOFOLLOW)
3925                lflags |= VNODE_LOOKUP_NOFOLLOW;
3926
3927        if (lflags & VNODE_LOOKUP_NOFOLLOW)
3928                ndflags = NOFOLLOW;
3929        else
3930                ndflags = FOLLOW;
3931
3932        if (lflags & VNODE_LOOKUP_NOCROSSMOUNT)
3933                ndflags |= NOCROSSMOUNT;
3934        if (lflags & VNODE_LOOKUP_DOWHITEOUT)
3935                ndflags |= DOWHITEOUT;
3936        
3937        /* XXX AUDITVNPATH1 needed ? */
3938        NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
3939
3940        if ((error = vn_open(&nd, fmode, cmode)))
3941                *vpp = NULL;
3942        else
3943                *vpp = nd.ni_vp;
3944        
3945        return (error);
3946}
3947
3948errno_t
3949vnode_close(vnode_t vp, int flags, vfs_context_t context)
3950{
3951        kauth_cred_t cred;
3952        struct proc *p;
3953        int error;
3954
3955        if (context) {
3956                p = context->vc_proc;
3957                cred = context->vc_ucred;
3958        } else {
3959                p = current_proc();
3960                cred = kauth_cred_get();
3961        }
3962        
3963        error = vn_close(vp, flags, cred, p);
3964        vnode_put(vp);
3965        return (error);
3966}
3967
3968errno_t
3969vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx)
3970{
3971        struct vnode_attr       va;
3972        int                     error;
3973
3974        VATTR_INIT(&va);
3975        VATTR_WANTED(&va, va_data_size);
3976        error = vnode_getattr(vp, &va, ctx);
3977        if (!error)
3978                *sizep = va.va_data_size;
3979        return(error);
3980}
3981
3982errno_t
3983vnode_setsize(vnode_t vp, off_t size, int ioflag, vfs_context_t ctx)
3984{
3985        struct vnode_attr       va;
3986
3987        VATTR_INIT(&va);
3988        VATTR_SET(&va, va_data_size, size);
3989        va.va_vaflags = ioflag & 0xffff;
3990        return(vnode_setattr(vp, &va, ctx));
3991}
3992
3993errno_t
3994vn_create(vnode_t dvp, vnode_t *vpp, struct componentname *cnp, struct vnode_attr *vap, int flags, vfs_context_t ctx)
3995{
3996        kauth_acl_t oacl, nacl;
3997        int initial_acl;
3998        errno_t error;
3999        vnode_t vp = (vnode_t)0;
4000
4001        error = 0;
4002        oacl = nacl = NULL;
4003        initial_acl = 0;
4004
4005        KAUTH_DEBUG("%p    CREATE - '%s'", dvp, cnp->cn_nameptr);
4006
4007        /*
4008         * Handle ACL inheritance.
4009         */
4010        if (!(flags & VN_CREATE_NOINHERIT) && vfs_extendedsecurity(dvp->v_mount)) {
4011                /* save the original filesec */
4012                if (VATTR_IS_ACTIVE(vap, va_acl)) {
4013                        initial_acl = 1;
4014                        oacl = vap->va_acl;
4015                }
4016
4017                vap->va_acl = NULL;
4018                if ((error = kauth_acl_inherit(dvp,
4019                         oacl,
4020                         &nacl,
4021                         vap->va_type == VDIR,
4022                         ctx)) != 0) {
4023                        KAUTH_DEBUG("%p    CREATE - error %d processing inheritance", dvp, error);
4024                        return(error);
4025                }
4026
4027                /*
4028                 * If the generated ACL is NULL, then we can save ourselves some effort
4029                 * by clearing the active bit.
4030                 */
4031                if (nacl == NULL) {
4032                        VATTR_CLEAR_ACTIVE(vap, va_acl);
4033                } else {
4034                        VATTR_SET(vap, va_acl, nacl);
4035                }
4036        }
4037        
4038        /*
4039         * Check and default new attributes.
4040         * This will set va_uid, va_gid, va_mode and va_create_time at least, if the caller
4041         * hasn't supplied them.
4042         */
4043        if ((error = vnode_authattr_new(dvp, vap, flags & VN_CREATE_NOAUTH, ctx)) != 0) {
4044                KAUTH_DEBUG("%p    CREATE - error %d handing/defaulting attributes", dvp, error);
4045                goto out;
4046        }
4047
4048                
4049        /*
4050         * Create the requested node.
4051         */
4052        switch(vap->va_type) {
4053        case VREG:
4054                error = VNOP_CREATE(dvp, vpp, cnp, vap, ctx);
4055                break;
4056        case VDIR:
4057                error = VNOP_MKDIR(dvp, vpp, cnp, vap, ctx);
4058                break;
4059        case VSOCK:
4060        case VFIFO:
4061        case VBLK:
4062        case VCHR:
4063                error = VNOP_MKNOD(dvp, vpp, cnp, vap, ctx);
4064                break;
4065        default:
4066                panic("vnode_create: unknown vtype %d", vap->va_type);
4067        }
4068        if (error != 0) {
4069                KAUTH_DEBUG("%p    CREATE - error %d returned by filesystem", dvp, error);
4070                goto out;
4071        }
4072
4073        vp = *vpp;
4074        /*
4075         * If some of the requested attributes weren't handled by the VNOP,
4076         * use our fallback code.
4077         */
4078        if (!VATTR_ALL_SUPPORTED(vap) && *vpp) {
4079                KAUTH_DEBUG("     CREATE - doing fallback with ACL %p", vap->va_acl);
4080                error = vnode_setattr_fallback(*vpp, vap, ctx);
4081        }
4082        if ((error != 0 ) && (vp != (vnode_t)0)) {
4083                *vpp = (vnode_t) 0;
4084                vnode_put(vp);
4085        }
4086
4087out:
4088        /*
4089         * If the caller supplied a filesec in vap, it has been replaced
4090         * now by the post-inheritance copy.  We need to put the original back
4091         * and free the inherited product.
4092         */
4093        if (initial_acl) {
4094                VATTR_SET(vap, va_acl, oacl);
4095        } else {
4096                VATTR_CLEAR_ACTIVE(vap, va_acl);
4097        }
4098        if (nacl != NULL)
4099                kauth_acl_free(nacl);
4100
4101        return(error);
4102}
4103
4104static kauth_scope_t    vnode_scope;
4105static int      vnode_authorize_callback(kauth_cred_t credential, __unused void *idata, kauth_action_t action,
4106    uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3);
4107
4108typedef struct _vnode_authorize_context {
4109        vnode_t         vp;
4110        struct vnode_attr *vap;
4111        vnode_t         dvp;
4112        struct vnode_attr *dvap;
4113        vfs_context_t   ctx;
4114        int             flags;
4115        int             flags_valid;
4116#define _VAC_IS_OWNER           (1<<0)
4117#define _VAC_IN_GROUP           (1<<1)
4118#define _VAC_IS_DIR_OWNER       (1<<2)
4119#define _VAC_IN_DIR_GROUP       (1<<3)
4120} *vauth_ctx;
4121
4122void
4123vnode_authorize_init(void)
4124{
4125        vnode_scope = kauth_register_scope(KAUTH_SCOPE_VNODE, vnode_authorize_callback, NULL);
4126}
4127
4128/*
4129 * Authorize an operation on a vnode.
4130 *
4131 * This is KPI, but here because it needs vnode_scope.
4132 */
4133int
4134vnode_authorize(vnode_t vp, vnode_t dvp, kauth_action_t action, vfs_context_t context)
4135{
4136        int     error, result;
4137
4138        /*
4139         * We can't authorize against a dead vnode; allow all operations through so that
4140         * the correct error can be returned.
4141         */
4142        if (vp->v_type == VBAD)
4143                return(0);
4144        
4145        error = 0;
4146        result = kauth_authorize_action(vnode_scope, vfs_context_ucred(context), action,
4147                   (uintptr_t)context, (uintptr_t)vp, (uintptr_t)dvp, (uintptr_t)&error);
4148        if (result == EPERM)            /* traditional behaviour */
4149                result = EACCES;
4150        /* did the lower layers give a better error return? */
4151        if ((result != 0) && (error != 0))
4152                return(error);
4153        return(result);
4154}
4155
4156/*
4157 * Test for vnode immutability.
4158 *
4159 * The 'append' flag is set when the authorization request is constrained
4160 * to operations which only request the right to append to a file.
4161 *
4162 * The 'ignore' flag is set when an operation modifying the immutability flags
4163 * is being authorized.  We check the system securelevel to determine which
4164 * immutability flags we can ignore.
4165 */
4166static int
4167vnode_immutable(struct vnode_attr *vap, int append, int ignore)
4168{
4169        int     mask;
4170
4171        /* start with all bits precluding the operation */
4172        mask = IMMUTABLE | APPEND;
4173
4174        /* if appending only, remove the append-only bits */
4175        if (append)
4176                mask &= ~APPEND;
4177
4178        /* ignore only set when authorizing flags changes */
4179        if (ignore) {
4180                if (securelevel <= 0) {
4181                        /* in insecure state, flags do not inhibit changes */
4182                        mask = 0;
4183                } else {
4184                        /* in secure state, user flags don't inhibit */
4185                        mask &= ~(UF_IMMUTABLE | UF_APPEND);
4186                }
4187        }
4188        KAUTH_DEBUG("IMMUTABLE - file flags 0x%x mask 0x%x append = %d ignore = %d", vap->va_flags, mask, append, ignore);
4189        if ((vap->va_flags & mask) != 0)
4190                return(EPERM);
4191        return(0);
4192}
4193
4194static int
4195vauth_node_owner(struct vnode_attr *vap, kauth_cred_t cred)
4196{
4197        int result;
4198
4199        /* default assumption is not-owner */
4200        result = 0;
4201
4202        /*
4203         * If the filesystem has given us a UID, we treat this as authoritative.
4204         */
4205        if (vap && VATTR_IS_SUPPORTED(vap, va_uid)) {
4206                result = (vap->va_uid == kauth_cred_getuid(cred)) ? 1 : 0;
4207        }
4208        /* we could test the owner UUID here if we had a policy for it */
4209        
4210        return(result);
4211}
4212
4213static int
4214vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember)
4215{
4216        int     error;
4217        int     result;
4218
4219        error = 0;
4220        result = 0;
4221
4222        /* the caller is expected to have asked the filesystem for a group at some point */
4223        if (vap && VATTR_IS_SUPPORTED(vap, va_gid)) {
4224                error = kauth_cred_ismember_gid(cred, vap->va_gid, &result);
4225        }
4226        /* we could test the group UUID here if we had a policy for it */
4227
4228        if (!error)
4229                *ismember = result;
4230        return(error);
4231}
4232
4233static int
4234vauth_file_owner(vauth_ctx vcp)
4235{
4236        int result;
4237
4238        if (vcp->flags_valid & _VAC_IS_OWNER) {
4239                result = (vcp->flags & _VAC_IS_OWNER) ? 1 : 0;
4240        } else {
4241                result = vauth_node_owner(vcp->vap, vcp->ctx->vc_ucred);
4242
4243                /* cache our result */
4244                vcp->flags_valid |= _VAC_IS_OWNER;
4245                if (result) {
4246                        vcp->flags |= _VAC_IS_OWNER;
4247                } else {
4248                        vcp->flags &= ~_VAC_IS_OWNER;
4249                }
4250        }
4251        return(result);
4252}
4253
4254static int
4255vauth_file_ingroup(vauth_ctx vcp, int *ismember)
4256{
4257        int     error;
4258
4259        if (vcp->flags_valid & _VAC_IN_GROUP) {
4260                *ismember = (vcp->flags & _VAC_IN_GROUP) ? 1 : 0;
4261                error = 0;
4262        } else {
4263                error = vauth_node_group(vcp->vap, vcp->ctx->vc_ucred, ismember);
4264
4265                if (!error) {
4266                        /* cache our result */
4267                        vcp->flags_valid |= _VAC_IN_GROUP;
4268                        if (*ismember) {
4269                                vcp->flags |= _VAC_IN_GROUP;
4270                        } else {
4271                                vcp->flags &= ~_VAC_IN_GROUP;
4272                        }
4273                }
4274                
4275        }
4276        return(error);
4277}
4278
4279static int
4280vauth_dir_owner(vauth_ctx vcp)
4281{
4282        int result;
4283
4284        if (vcp->flags_valid & _VAC_IS_DIR_OWNER) {
4285                result = (vcp->flags & _VAC_IS_DIR_OWNER) ? 1 : 0;
4286        } else {
4287                result = vauth_node_owner(vcp->dvap, vcp->ctx->vc_ucred);
4288
4289                /* cache our result */
4290                vcp->flags_valid |= _VAC_IS_DIR_OWNER;
4291                if (result) {
4292                        vcp->flags |= _VAC_IS_DIR_OWNER;
4293                } else {
4294                        vcp->flags &= ~_VAC_IS_DIR_OWNER;
4295                }
4296        }
4297        return(result);
4298}
4299
4300static int
4301vauth_dir_ingroup(vauth_ctx vcp, int *ismember)
4302{
4303        int     error;
4304
4305        if (vcp->flags_valid & _VAC_IN_DIR_GROUP) {
4306                *ismember = (vcp->flags & _VAC_IN_DIR_GROUP) ? 1 : 0;
4307                error = 0;
4308        } else {
4309                error = vauth_node_group(vcp->dvap, vcp->ctx->vc_ucred, ismember);
4310
4311                if (!error) {
4312                        /* cache our result */
4313                        vcp->flags_valid |= _VAC_IN_DIR_GROUP;
4314                        if (*ismember) {
4315                                vcp->flags |= _VAC_IN_DIR_GROUP;
4316                        } else {
4317                                vcp->flags &= ~_VAC_IN_DIR_GROUP;
4318                        }
4319                }
4320        }
4321        return(error);
4322}
4323
4324/*
4325 * Test the posix permissions in (vap) to determine whether (credential)
4326 * may perform (action)
4327 */
4328static int
4329vnode_authorize_posix(vauth_ctx vcp, int action, int on_dir)
4330{
4331        struct vnode_attr *vap;
4332        int needed, error, owner_ok, group_ok, world_ok, ismember;
4333#ifdef KAUTH_DEBUG_ENABLE
4334        const char *where;
4335# define _SETWHERE(c)   where = c;
4336#else
4337# define _SETWHERE(c)
4338#endif
4339
4340        /* checking file or directory? */
4341        if (on_dir) {
4342                vap = vcp->dvap;
4343        } else {
4344                vap = vcp->vap;
4345        }
4346        
4347        error = 0;
4348        
4349        /*
4350         * We want to do as little work here as possible.  So first we check
4351         * which sets of permissions grant us the access we need, and avoid checking
4352         * whether specific permissions grant access when more generic ones would.
4353         */
4354
4355        /* owner permissions */
4356        needed = 0;
4357        if (action & VREAD)
4358                needed |= S_IRUSR;
4359        if (action & VWRITE)
4360                needed |= S_IWUSR;
4361        if (action & VEXEC)
4362                needed |= S_IXUSR;
4363        owner_ok = (needed & vap->va_mode) == needed;
4364
4365        /* group permissions */
4366        needed = 0;
4367        if (action & VREAD)
4368                needed |= S_IRGRP;
4369        if (action & VWRITE)
4370                needed |= S_IWGRP;
4371        if (action & VEXEC)
4372                needed |= S_IXGRP;
4373        group_ok = (needed & vap->va_mode) == needed;
4374
4375        /* world permissions */
4376        needed = 0;
4377        if (action & VREAD)
4378                needed |= S_IROTH;
4379        if (action & VWRITE)
4380                needed |= S_IWOTH;
4381        if (action & VEXEC)
4382                needed |= S_IXOTH;
4383        world_ok = (needed & vap->va_mode) == needed;
4384
4385        /* If granted/denied by all three, we're done */
4386        if (owner_ok && group_ok && world_ok) {
4387                _SETWHERE("all");
4388                goto out;
4389        }
4390        if (!owner_ok && !group_ok && !world_ok) {
4391                _SETWHERE("all");
4392                error = EACCES;
4393                goto out;
4394        }
4395
4396        /* Check ownership (relatively cheap) */
4397        if ((on_dir && vauth_dir_owner(vcp)) ||
4398            (!on_dir && vauth_file_owner(vcp))) {
4399                _SETWHERE("user");
4400                if (!owner_ok)
4401                        error = EACCES;
4402                goto out;
4403        }
4404
4405        /* Not owner; if group and world both grant it we're done */
4406        if (group_ok && world_ok) {
4407                _SETWHERE("group/world");
4408                goto out;
4409        }
4410        if (!group_ok && !world_ok) {
4411                _SETWHERE("group/world");
4412                error = EACCES;
4413                goto out;
4414        }
4415
4416        /* Check group membership (most expensive) */
4417        ismember = 0;
4418        if (on_dir) {
4419                error = vauth_dir_ingroup(vcp, &ismember);
4420        } else {
4421                error = vauth_file_ingroup(vcp, &ismember);
4422        }
4423        if (error)
4424                goto out;
4425        if (ismember) {
4426                _SETWHERE("group");
4427                if (!group_ok)
4428                        error = EACCES;
4429                goto out;
4430        }
4431
4432        /* Not owner, not in group, use world result */
4433        _SETWHERE("world");
4434        if (!world_ok)
4435                error = EACCES;
4436
4437        /* FALLTHROUGH */
4438
4439out:
4440        KAUTH_DEBUG("%p    %s - posix %s permissions : need %s%s%s %x have %s%s%s%s%s%s%s%s%s UID = %d file = %d,%d",
4441            vcp->vp, (error == 0) ? "ALLOWED" : "DENIED", where,
4442            (action & VREAD)  ? "r" : "-",
4443            (action & VWRITE) ? "w" : "-",
4444            (action & VEXEC)  ? "x" : "-",
4445            needed,
4446            (vap->va_mode & S_IRUSR) ? "r" : "-",
4447            (vap->va_mode & S_IWUSR) ? "w" : "-",
4448            (vap->va_mode & S_IXUSR) ? "x" : "-",
4449            (vap->va_mode & S_IRGRP) ? "r" : "-",
4450            (vap->va_mode & S_IWGRP) ? "w" : "-",
4451            (vap->va_mode & S_IXGRP) ? "x" : "-",
4452            (vap->va_mode & S_IROTH) ? "r" : "-",
4453            (vap->va_mode & S_IWOTH) ? "w" : "-",
4454            (vap->va_mode & S_IXOTH) ? "x" : "-",
4455            kauth_cred_getuid(vcp->ctx->vc_ucred),
4456            on_dir ? vcp->dvap->va_uid : vcp->vap->va_uid,
4457            on_dir ? vcp->dvap->va_gid : vcp->vap->va_gid);
4458        return(error);
4459}
4460
4461/*
4462 * Authorize the deletion of the node vp from the directory dvp.
4463 *
4464 * We assume that:
4465 * - Neither the node nor the directory are immutable.
4466 * - The user is not the superuser.
4467 *
4468 * Deletion is not permitted if the directory is sticky and the caller is not owner of the
4469 * node or directory.
4470 *
4471 * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be
4472 * deleted.  If neither denies the permission, and the caller has Posix write access to the
4473 * directory, then the node may be deleted.
4474 */
4475static int
4476vnode_authorize_delete(vauth_ctx vcp)
4477{
4478        struct vnode_attr       *vap = vcp->vap;
4479        struct vnode_attr       *dvap = vcp->dvap;
4480        kauth_cred_t            cred = vcp->ctx->vc_ucred;
4481        struct kauth_acl_eval   eval;
4482        int                     error, delete_denied, delete_child_denied, ismember;
4483
4484        /* check the ACL on the directory */
4485        delete_child_denied = 0;
4486        if (VATTR_IS_NOT(dvap, va_acl, NULL)) {
4487                eval.ae_requested = KAUTH_VNODE_DELETE_CHILD;
4488                eval.ae_acl = &dvap->va_acl->acl_ace[0];
4489                eval.ae_count = dvap->va_acl->acl_entrycount;
4490                eval.ae_options = 0;
4491                if (vauth_dir_owner(vcp))
4492                        eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
4493                if ((error = vauth_dir_ingroup(vcp, &ismember)) != 0)
4494                        return(error);
4495                if (ismember)
4496                        eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
4497                eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
4498                eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
4499                eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
4500                eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
4501
4502                error = kauth_acl_evaluate(cred, &eval);
4503
4504                if (error != 0) {
4505                        KAUTH_DEBUG("%p    ERROR during ACL processing - %d", vcp->vp, error);
4506                        return(error);
4507                }
4508                if (eval.ae_result == KAUTH_RESULT_DENY)
4509                        delete_child_denied = 1;
4510                if (eval.ae_result == KAUTH_RESULT_ALLOW) {
4511                        KAUTH_DEBUG("%p    ALLOWED - granted by directory ACL", vcp->vp);
4512                        return(0);
4513                }
4514        }
4515
4516        /* check the ACL on the node */
4517        delete_denied = 0;
4518        if (VATTR_IS_NOT(vap, va_acl, NULL)) {
4519                eval.ae_requested = KAUTH_VNODE_DELETE;
4520                eval.ae_acl = &vap->va_acl->acl_ace[0];
4521                eval.ae_count = vap->va_acl->acl_entrycount;
4522                eval.ae_options = 0;
4523                if (vauth_file_owner(vcp))
4524                        eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
4525                if ((error = vauth_file_ingroup(vcp, &ismember)) != 0)
4526                        return(error);
4527                if (ismember)
4528                        eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
4529                eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
4530                eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
4531                eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
4532                eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
4533
4534                if ((error = kauth_acl_evaluate(cred, &eval)) != 0) {
4535                        KAUTH_DEBUG("%p    ERROR during ACL processing - %d", vcp->vp, error);
4536                        return(error);
4537                }
4538                if (eval.ae_result == KAUTH_RESULT_DENY)
4539                        delete_denied = 1;
4540                if (eval.ae_result == KAUTH_RESULT_ALLOW) {
4541                        KAUTH_DEBUG("%p    ALLOWED - granted by file ACL", vcp->vp);
4542                        return(0);
4543                }
4544        }
4545
4546        /* if denied by ACL on directory or node, return denial */
4547        if (delete_denied || delete_child_denied) {
4548                KAUTH_DEBUG("%p    ALLOWED - denied by ACL", vcp->vp);
4549                return(EACCES);
4550        }
4551
4552        /* enforce sticky bit behaviour */
4553        if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) {
4554                KAUTH_DEBUG("%p    DENIED - sticky bit rules (user %d  file %d  dir %d)",
4555                    vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid);
4556                return(EACCES);
4557        }
4558
4559        /* check the directory */
4560        if ((error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) {
4561                KAUTH_DEBUG("%p    ALLOWED - granted by posix permisssions", vcp->vp);
4562                return(error);
4563        }
4564
4565        /* not denied, must be OK */
4566        return(0);
4567}
4568        
4569
4570/*
4571 * Authorize an operation based on the node's attributes.
4572 */
4573static int
4574vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_rights_t preauth_rights)
4575{
4576        struct vnode_attr       *vap = vcp->vap;
4577        kauth_cred_t            cred = vcp->ctx->vc_ucred;
4578        struct kauth_acl_eval   eval;
4579        int                     error, ismember;
4580        mode_t                  posix_action;
4581
4582        /*
4583         * If we are the file owner, we automatically have some rights.
4584         *
4585         * Do we need to expand this to support group ownership?
4586         */
4587        if (vauth_file_owner(vcp))
4588                acl_rights &= ~(KAUTH_VNODE_WRITE_SECURITY);
4589
4590        /*
4591         * If we are checking both TAKE_OWNERSHIP and WRITE_SECURITY, we can
4592         * mask the latter.  If TAKE_OWNERSHIP is requested the caller is about to
4593         * change ownership to themselves, and WRITE_SECURITY is implicitly
4594         * granted to the owner.  We need to do this because at this point
4595         * WRITE_SECURITY may not be granted as the caller is not currently
4596         * the owner.
4597         */
4598        if ((acl_rights & KAUTH_VNODE_TAKE_OWNERSHIP) &&
4599            (acl_rights & KAUTH_VNODE_WRITE_SECURITY))
4600                acl_rights &= ~KAUTH_VNODE_WRITE_SECURITY;
4601        
4602        if (acl_rights == 0) {
4603                KAUTH_DEBUG("%p    ALLOWED - implicit or no rights required", vcp->vp);
4604                return(0);
4605        }
4606
4607        /* if we have an ACL, evaluate it */
4608        if (VATTR_IS_NOT(vap, va_acl, NULL)) {
4609                eval.ae_requested = acl_rights;
4610                eval.ae_acl = &vap->va_acl->acl_ace[0];
4611                eval.ae_count = vap->va_acl->acl_entrycount;
4612                eval.ae_options = 0;
4613                if (vauth_file_owner(vcp))
4614                        eval.ae_options |= KAUTH_AEVAL_IS_OWNER;
4615                if ((error = vauth_file_ingroup(vcp, &ismember)) != 0)
4616                        return(error);
4617                if (ismember)
4618                        eval.ae_options |= KAUTH_AEVAL_IN_GROUP;
4619                eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS;
4620                eval.ae_exp_gread = KAUTH_VNODE_GENERIC_READ_BITS;
4621                eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS;
4622                eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS;
4623                
4624                if ((error = kauth_acl_evaluate(cred, &eval)) != 0) {
4625                        KAUTH_DEBUG("%p    ERROR during ACL processing - %d", vcp->vp, error);
4626                        return(error);
4627                }
4628                
4629                if (eval.ae_result == KAUTH_RESULT_DENY) {
4630                        KAUTH_DEBUG("%p    DENIED - by ACL", vcp->vp);
4631                        return(EACCES);                 /* deny, deny, counter-allege */
4632                }
4633                if (eval.ae_result == KAUTH_RESULT_ALLOW) {
4634                        KAUTH_DEBUG("%p    ALLOWED - all rights granted by ACL", vcp->vp);
4635                        return(0);
4636                }
4637                /* fall through and evaluate residual rights */
4638        } else {
4639                /* no ACL, everything is residual */
4640                eval.ae_residual = acl_rights;
4641        }
4642
4643        /*
4644         * Grant residual rights that have been pre-authorized.
4645         */
4646        eval.ae_residual &= ~preauth_rights;
4647
4648        /*
4649         * We grant WRITE_ATTRIBUTES to the owner if it hasn't been denied.
4650         */
4651        if (vauth_file_owner(vcp))
4652                eval.ae_residual &= ~KAUTH_VNODE_WRITE_ATTRIBUTES;
4653        
4654        if (eval.ae_residual == 0) {
4655                KAUTH_DEBUG("%p    ALLOWED - rights already authorized", vcp->vp);
4656                return(0);
4657        }               
4658        
4659        /*
4660         * Bail if we have residual rights that can't be granted by posix permissions,
4661         * or aren't presumed granted at this point.
4662         *
4663         * XXX these can be collapsed for performance
4664         */
4665        if (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER) {
4666                KAUTH_DEBUG("%p    DENIED - CHANGE_OWNER not permitted", vcp->vp);
4667                return(EACCES);
4668        }
4669        if (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY) {
4670                KAUTH_DEBUG("%p    DENIED - WRITE_SECURITY not permitted", vcp->vp);
4671                return(EACCES);
4672        }
4673
4674#if DIAGNOSTIC
4675        if (eval.ae_residual & KAUTH_VNODE_DELETE)
4676                panic("vnode_authorize: can't be checking delete permission here");
4677#endif
4678
4679        /*
4680         * Compute the fallback posix permissions that will satisfy the remaining
4681         * rights.
4682         */
4683        posix_action = 0;
4684        if (eval.ae_residual & (KAUTH_VNODE_READ_DATA |
4685                KAUTH_VNODE_LIST_DIRECTORY |
4686                KAUTH_VNODE_READ_EXTATTRIBUTES))
4687                posix_action |= VREAD;
4688        if (eval.ae_residual & (KAUTH_VNODE_WRITE_DATA |
4689                KAUTH_VNODE_ADD_FILE |
4690                KAUTH_VNODE_ADD_SUBDIRECTORY |
4691                KAUTH_VNODE_DELETE_CHILD |
4692                KAUTH_VNODE_WRITE_ATTRIBUTES |
4693                KAUTH_VNODE_WRITE_EXTATTRIBUTES))
4694                posix_action |= VWRITE;
4695        if (eval.ae_residual & (KAUTH_VNODE_EXECUTE |
4696                KAUTH_VNODE_SEARCH))
4697                posix_action |= VEXEC;
4698        
4699        if (posix_action != 0) {
4700                return(vnode_authorize_posix(vcp, posix_action, 0 /* !on_dir */));
4701        } else {
4702                KAUTH_DEBUG("%p    ALLOWED - residual rights %s%s%s%s%s%s%s%s%s%s%s%s%s%s granted due to no posix mapping",
4703                    vcp->vp,
4704                    (eval.ae_residual & KAUTH_VNODE_READ_DATA)
4705                    ? vnode_isdir(vcp->vp) ? " LIST_DIRECTORY" : " READ_DATA" : "",
4706                    (eval.ae_residual & KAUTH_VNODE_WRITE_DATA)
4707                    ? vnode_isdir(vcp->vp) ? " ADD_FILE" : " WRITE_DATA" : "",
4708                    (eval.ae_residual & KAUTH_VNODE_EXECUTE)
4709                    ? vnode_isdir(vcp->vp) ? " SEARCH" : " EXECUTE" : "",
4710                    (eval.ae_residual & KAUTH_VNODE_DELETE)
4711                    ? " DELETE" : "",
4712                    (eval.ae_residual & KAUTH_VNODE_APPEND_DATA)
4713                    ? vnode_isdir(vcp->vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "",
4714                    (eval.ae_residual & KAUTH_VNODE_DELETE_CHILD)
4715                    ? " DELETE_CHILD" : "",
4716                    (eval.ae_residual & KAUTH_VNODE_READ_ATTRIBUTES)
4717                    ? " READ_ATTRIBUTES" : "",
4718                    (eval.ae_residual & KAUTH_VNODE_WRITE_ATTRIBUTES)
4719                    ? " WRITE_ATTRIBUTES" : "",
4720                    (eval.ae_residual & KAUTH_VNODE_READ_EXTATTRIBUTES)
4721                    ? " READ_EXTATTRIBUTES" : "",
4722                    (eval.ae_residual & KAUTH_VNODE_WRITE_EXTATTRIBUTES)
4723                    ? " WRITE_EXTATTRIBUTES" : "",
4724                    (eval.ae_residual & KAUTH_VNODE_READ_SECURITY)
4725                    ? " READ_SECURITY" : "",
4726                    (eval.ae_residual & KAUTH_VNODE_WRITE_SECURITY)
4727                    ? " WRITE_SECURITY" : "",
4728                    (eval.ae_residual & KAUTH_VNODE_CHECKIMMUTABLE)
4729                    ? " CHECKIMMUTABLE" : "",
4730                    (eval.ae_residual & KAUTH_VNODE_CHANGE_OWNER)
4731                    ? " CHANGE_OWNER" : "");
4732        }
4733
4734        /*
4735         * Lack of required Posix permissions implies no reason to deny access.
4736         */
4737        return(0);
4738}
4739
4740/*
4741 * Check for file immutability.
4742 */
4743static int
4744vnode_authorize_checkimmutable(vnode_t vp, struct vnode_attr *vap, int rights, int ignore)
4745{
4746        mount_t mp;
4747        int error;
4748        int append;
4749
4750        /*
4751         * Perform immutability checks for operations that change data.
4752         *
4753         * Sockets, fifos and devices require special handling.
4754         */
4755        switch(vp->v_type) {
4756        case VSOCK:
4757        case VFIFO:
4758        case VBLK:
4759        case VCHR:
4760                /*
4761                 * Writing to these nodes does not change the filesystem data,
4762                 * so forget that it's being tried.
4763                 */
4764                rights &= ~KAUTH_VNODE_WRITE_DATA;
4765                break;
4766        default:
4767                break;
4768        }
4769
4770        error = 0;
4771        if (rights & KAUTH_VNODE_WRITE_RIGHTS) {
4772                
4773                /* check per-filesystem options if possible */
4774                mp = vnode_mount(vp);
4775                if (mp != NULL) {
4776        
4777                        /* check for no-EA filesystems */
4778                        if ((rights & KAUTH_VNODE_WRITE_EXTATTRIBUTES) &&
4779                            (vfs_flags(mp) & MNT_NOUSERXATTR)) {
4780                                KAUTH_DEBUG("%p    DENIED - filesystem disallowed extended attributes", vp);
4781                                error = EACCES;  /* User attributes disabled */
4782                                goto out;
4783                        }
4784                }
4785
4786                /* check for file immutability */
4787                append = 0;
4788                if (vp->v_type == VDIR) {
4789                        if ((rights & (KAUTH_VNODE_ADD_FILE | KAUTH_VNODE_ADD_SUBDIRECTORY)) == rights)
4790                                append = 1;
4791                } else {
4792                        if ((rights & KAUTH_VNODE_APPEND_DATA) == rights)
4793                                append = 1;
4794                }
4795                if ((error = vnode_immutable(vap, append, ignore)) != 0) {
4796                        KAUTH_DEBUG("%p    DENIED - file is immutable", vp);
4797                        goto out;
4798                }
4799        }
4800out:
4801        return(error);
4802}
4803
4804/*
4805 * Handle authorization actions for filesystems that advertise that the server will
4806 * be enforcing.
4807 */
4808static int
4809vnode_authorize_opaque(vnode_t vp, int *resultp, kauth_action_t action, vfs_context_t ctx)
4810{
4811        int     error;
4812
4813        /*
4814         * If the vp is a device node, socket or FIFO it actually represents a local
4815         * endpoint, so we need to handle it locally.
4816         */
4817        switch(vp->v_type) {
4818        case VBLK:
4819        case VCHR:
4820        case VSOCK:
4821        case VFIFO:
4822                return(0);
4823        default:
4824                break;
4825        }
4826
4827        /*
4828         * In the advisory request case, if the filesystem doesn't think it's reliable
4829         * we will attempt to formulate a result ourselves based on VNOP_GETATTR data.
4830         */
4831        if ((action & KAUTH_VNODE_ACCESS) && !vfs_authopaqueaccess(vnode_mount(vp)))
4832                return(0);
4833
4834        /*
4835         * Let the filesystem have a say in the matter.  It's OK for it to not implemnent
4836         * VNOP_ACCESS, as most will authorise inline with the actual request.
4837         */
4838        if ((error = VNOP_ACCESS(vp, action, ctx)) != ENOTSUP) {
4839                *resultp = error;
4840                KAUTH_DEBUG("%p    DENIED - opaque filesystem VNOP_ACCESS denied access", vp);
4841                return(1);
4842        }
4843        
4844        /*
4845         * Typically opaque filesystems do authorisation in-line, but exec is a special case.  In
4846         * order to be reasonably sure that exec will be permitted, we try a bit harder here.
4847         */
4848        if ((action & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp)) {
4849                /* try a VNOP_OPEN for readonly access */
4850                if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) {
4851                        *resultp = error;
4852                        KAUTH_DEBUG("%p    DENIED - EXECUTE denied because file could not be opened readonly", vp);
4853                        return(1);
4854                }
4855                VNOP_CLOSE(vp, FREAD, ctx);
4856        }
4857
4858        /*
4859         * We don't have any reason to believe that the request has to be denied at this point,
4860         * so go ahead and allow it.
4861         */
4862        *resultp = 0;
4863        KAUTH_DEBUG("%p    ALLOWED - bypassing access check for non-local filesystem", vp);
4864        return(1);
4865}
4866
4867static int
4868vnode_authorize_callback(__unused kauth_cred_t unused_cred, __unused void *idata, kauth_action_t action,
4869    uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3)
4870{
4871        struct _vnode_authorize_context auth_context;
4872        vauth_ctx               vcp;
4873        vfs_context_t           ctx;
4874        vnode_t                 vp, dvp;
4875        kauth_cred_t            cred;
4876        kauth_ace_rights_t      rights;
4877        struct vnode_attr       va, dva;
4878        int                     result;
4879        int                     *errorp;
4880        int                     noimmutable;
4881
4882        vcp = &auth_context;
4883        ctx = vcp->ctx = (vfs_context_t)arg0;
4884        vp = vcp->vp = (vnode_t)arg1;
4885        dvp = vcp->dvp = (vnode_t)arg2;
4886        errorp = (int *)arg3;
4887        /* note that we authorize against the context, not the passed cred (the same thing anyway) */
4888        cred = ctx->vc_ucred;
4889
4890        VATTR_INIT(&va);
4891        vcp->vap = &va;
4892        VATTR_INIT(&dva);
4893        vcp->dvap = &dva;
4894
4895        vcp->flags = vcp->flags_valid = 0;
4896
4897#if DIAGNOSTIC
4898        if ((ctx == NULL) || (vp == NULL) || (cred == NULL))
4899                panic("vnode_authorize: bad arguments (context %p  vp %p  cred %p)", ctx, vp, cred);
4900#endif
4901
4902        KAUTH_DEBUG("%p  AUTH - %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s on %s '%s' (0x%x:%p/%p)",
4903            vp, vfs_context_proc(ctx)->p_comm,
4904            (action & KAUTH_VNODE_ACCESS)               ? "access" : "auth",
4905            (action & KAUTH_VNODE_READ_DATA)            ? vnode_isdir(vp) ? " LIST_DIRECTORY" : " READ_DATA" : "",
4906            (action & KAUTH_VNODE_WRITE_DATA)           ? vnode_isdir(vp) ? " ADD_FILE" : " WRITE_DATA" : "",
4907            (action & KAUTH_VNODE_EXECUTE)              ? vnode_isdir(vp) ? " SEARCH" : " EXECUTE" : "",
4908            (action & KAUTH_VNODE_DELETE)               ? " DELETE" : "",
4909            (action & KAUTH_VNODE_APPEND_DATA)          ? vnode_isdir(vp) ? " ADD_SUBDIRECTORY" : " APPEND_DATA" : "",
4910            (action & KAUTH_VNODE_DELETE_CHILD)         ? " DELETE_CHILD" : "",
4911            (action & KAUTH_VNODE_READ_ATTRIBUTES)      ? " READ_ATTRIBUTES" : "",
4912            (action & KAUTH_VNODE_WRITE_ATTRIBUTES)     ? " WRITE_ATTRIBUTES" : "",
4913            (action & KAUTH_VNODE_READ_EXTATTRIBUTES)   ? " READ_EXTATTRIBUTES" : "",
4914            (action & KAUTH_VNODE_WRITE_EXTATTRIBUTES)  ? " WRITE_EXTATTRIBUTES" : "",
4915            (action & KAUTH_VNODE_READ_SECURITY)        ? " READ_SECURITY" : "",
4916            (action & KAUTH_VNODE_WRITE_SECURITY)       ? " WRITE_SECURITY" : "",
4917            (action & KAUTH_VNODE_CHANGE_OWNER)         ? " CHANGE_OWNER" : "",
4918            (action & KAUTH_VNODE_NOIMMUTABLE)          ? " (noimmutable)" : "",
4919            vnode_isdir(vp) ? "directory" : "file",
4920            vp->v_name ? vp->v_name : "<NULL>", action, vp, dvp);
4921
4922        /*
4923         * Extract the control bits from the action, everything else is
4924         * requested rights.
4925         */
4926        noimmutable = (action & KAUTH_VNODE_NOIMMUTABLE) ? 1 : 0;
4927        rights = action & ~(KAUTH_VNODE_ACCESS | KAUTH_VNODE_NOIMMUTABLE);
4928 
4929        if (rights & KAUTH_VNODE_DELETE) {
4930#if DIAGNOSTIC
4931                if (dvp == NULL)
4932                        panic("vnode_authorize: KAUTH_VNODE_DELETE test requires a directory");
4933#endif
4934        } else {
4935                dvp = NULL;
4936        }
4937        
4938        /*
4939         * Check for read-only filesystems.
4940         */
4941        if ((rights & KAUTH_VNODE_WRITE_RIGHTS) &&
4942            (vp->v_mount->mnt_flag & MNT_RDONLY) &&
4943            ((vp->v_type == VREG) || (vp->v_type == VDIR) || 
4944             (vp->v_type == VLNK) || (vp->v_type == VCPLX) || 
4945             (rights & KAUTH_VNODE_DELETE) || (rights & KAUTH_VNODE_DELETE_CHILD))) {
4946                result = EROFS;
4947                goto out;
4948        }
4949
4950        /*
4951         * Check for noexec filesystems.
4952         */
4953        if ((rights & KAUTH_VNODE_EXECUTE) && vnode_isreg(vp) && (vp->v_mount->mnt_flag & MNT_NOEXEC)) {
4954                result = EACCES;
4955                goto out;
4956        }
4957
4958        /*
4959         * Handle cases related to filesystems with non-local enforcement.
4960         * This call can return 0, in which case we will fall through to perform a
4961         * check based on VNOP_GETATTR data.  Otherwise it returns 1 and sets
4962         * an appropriate result, at which point we can return immediately.
4963         */
4964        if (vfs_authopaque(vp->v_mount) && vnode_authorize_opaque(vp, &result, action, ctx))
4965                goto out;
4966
4967        /*
4968         * Get vnode attributes and extended security information for the vnode
4969         * and directory if required.
4970         */
4971        VATTR_WANTED(&va, va_mode);
4972        VATTR_WANTED(&va, va_uid);
4973        VATTR_WANTED(&va, va_gid);
4974        VATTR_WANTED(&va, va_flags);
4975        VATTR_WANTED(&va, va_acl);
4976        if ((result = vnode_getattr(vp, &va, ctx)) != 0) {
4977                KAUTH_DEBUG("%p    ERROR - failed to get vnode attributes - %d", vp, result);
4978                goto out;
4979        }
4980        if (dvp) {
4981                VATTR_WANTED(&dva, va_mode);
4982                VATTR_WANTED(&dva, va_uid);
4983                VATTR_WANTED(&dva, va_gid);
4984                VATTR_WANTED(&dva, va_flags);
4985                VATTR_WANTED(&dva, va_acl);
4986                if ((result = vnode_getattr(dvp, &dva, ctx)) != 0) {
4987                        KAUTH_DEBUG("%p    ERROR - failed to get directory vnode attributes - %d", vp, result);
4988                        goto out;
4989                }
4990        }
4991
4992        /*
4993         * If the vnode is an extended attribute data vnode (eg. a resource fork), *_DATA becomes
4994         * *_EXTATTRIBUTES.
4995         */
4996        if (S_ISXATTR(va.va_mode)) {
4997                if (rights & KAUTH_VNODE_READ_DATA) {
4998                        rights &= ~KAUTH_VNODE_READ_DATA;
4999                        rights |= KAUTH_VNODE_READ_EXTATTRIBUTES;
5000                }
5001                if (rights & KAUTH_VNODE_WRITE_DATA) {
5002                        rights &= ~KAUTH_VNODE_WRITE_DATA;
5003                        rights |= KAUTH_VNODE_WRITE_EXTATTRIBUTES;
5004                }
5005        }
5006        
5007        /*
5008         * Check for immutability.
5009         *
5010         * In the deletion case, parent directory immutability vetoes specific
5011         * file rights.
5012         */
5013        if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0)
5014                goto out;
5015        if ((rights & KAUTH_VNODE_DELETE) &&
5016            ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0))
5017                goto out;
5018
5019        /*
5020         * Clear rights that have been authorized by reaching this point, bail if nothing left to
5021         * check.
5022         */
5023        rights &= ~(KAUTH_VNODE_LINKTARGET | KAUTH_VNODE_CHECKIMMUTABLE);
5024        if (rights == 0)
5025                goto out;
5026
5027        /*
5028         * If we're not the superuser, authorize based on file properties.
5029         */
5030        if (!vfs_context_issuser(ctx)) {
5031                /* process delete rights */
5032                if ((rights & KAUTH_VNODE_DELETE) &&
5033                    ((result = vnode_authorize_delete(vcp)) != 0))
5034                    goto out;
5035
5036                /* process remaining rights */
5037                if ((rights & ~KAUTH_VNODE_DELETE) &&
5038                    ((result = vnode_authorize_simple(vcp, rights, rights & KAUTH_VNODE_DELETE)) != 0))
5039                        goto out;
5040        } else {
5041
5042                /*
5043                 * Execute is only granted to root if one of the x bits is set.  This check only
5044                 * makes sense if the posix mode bits are actually supported.
5045                 */
5046                if ((rights & KAUTH_VNODE_EXECUTE) &&
5047                    (vp->v_type == VREG) &&
5048                    VATTR_IS_SUPPORTED(&va, va_mode) &&
5049                    !(va.va_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
5050                        result = EPERM;
5051                        KAUTH_DEBUG("%p    DENIED - root execute requires at least one x bit in 0x%x", vp, va.va_mode);
5052                        goto out;
5053                }
5054                
5055                KAUTH_DEBUG("%p    ALLOWED - caller is superuser", vp);
5056        }
5057
5058out:
5059        if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL))
5060                kauth_acl_free(va.va_acl);
5061        if (VATTR_IS_SUPPORTED(&dva, va_acl) && (dva.va_acl != NULL))
5062                kauth_acl_free(dva.va_acl);
5063        if (result) {
5064                *errorp = result;
5065                KAUTH_DEBUG("%p    DENIED - auth denied", vp);
5066                return(KAUTH_RESULT_DENY);
5067        }
5068
5069        /*
5070         * Note that this implies that we will allow requests for no rights, as well as
5071         * for rights that we do not recognise.  There should be none of these.
5072         */
5073        KAUTH_DEBUG("%p    ALLOWED - auth granted", vp);
5074        return(KAUTH_RESULT_ALLOW);
5075}
5076
5077/*
5078 * Check that the attribute information in vattr can be legally applied to
5079 * a new file by the context.
5080 */
5081int
5082vnode_authattr_new(vnode_t dvp, struct vnode_attr *vap, int noauth, vfs_context_t ctx)
5083{
5084        int             error;
5085        int             is_suser, ismember, defaulted_owner, defaulted_group, defaulted_mode;
5086        kauth_cred_t    cred;
5087        guid_t          changer;
5088        mount_t         dmp;
5089
5090        error = 0;
5091        defaulted_owner = defaulted_group = defaulted_mode = 0;
5092
5093        /*
5094         * Require that the filesystem support extended security to apply any.
5095         */
5096        if (!vfs_extendedsecurity(dvp->v_mount) &&
5097            (VATTR_IS_ACTIVE(vap, va_acl) || VATTR_IS_ACTIVE(vap, va_uuuid) || VATTR_IS_ACTIVE(vap, va_guuid))) {
5098                error = EINVAL;
5099                goto out;
5100        }
5101        
5102        /*
5103         * Default some fields.
5104         */
5105        dmp = dvp->v_mount;
5106
5107        /*
5108         * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit owner is set, that
5109         * owner takes ownership of all new files.
5110         */
5111        if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsowner != KAUTH_UID_NONE)) {
5112                VATTR_SET(vap, va_uid, dmp->mnt_fsowner);
5113                defaulted_owner = 1;
5114        } else {
5115                if (!VATTR_IS_ACTIVE(vap, va_uid)) {
5116                        /* default owner is current user */
5117                        VATTR_SET(vap, va_uid, kauth_cred_getuid(vfs_context_ucred(ctx)));
5118                        defaulted_owner = 1;
5119                }
5120        }
5121
5122        /*
5123         * If the filesystem is mounted IGNORE_OWNERSHIP and an explicit grouo is set, that
5124         * group takes ownership of all new files.
5125         */
5126        if ((dmp->mnt_flag & MNT_IGNORE_OWNERSHIP) && (dmp->mnt_fsgroup != KAUTH_GID_NONE)) {
5127                VATTR_SET(vap, va_gid, dmp->mnt_fsgroup);
5128                defaulted_group = 1;
5129        } else {
5130                if (!VATTR_IS_ACTIVE(vap, va_gid)) {
5131                        /* default group comes from parent object, fallback to current user */
5132                        struct vnode_attr dva;
5133                        VATTR_INIT(&dva);
5134                        VATTR_WANTED(&dva, va_gid);
5135                        if ((error = vnode_getattr(dvp, &dva, ctx)) != 0)
5136                                goto out;
5137                        if (VATTR_IS_SUPPORTED(&dva, va_gid)) {
5138                                VATTR_SET(vap, va_gid, dva.va_gid);
5139                        } else {
5140                                VATTR_SET(vap, va_gid, kauth_cred_getgid(vfs_context_ucred(ctx)));
5141                        }
5142                        defaulted_group = 1;
5143                }
5144        }
5145
5146        if (!VATTR_IS_ACTIVE(vap, va_flags))
5147                VATTR_SET(vap, va_flags, 0);
5148        
5149        /* default mode is everything, masked with current umask */
5150        if (!VATTR_IS_ACTIVE(vap, va_mode)) {
5151                VATTR_SET(vap, va_mode, ACCESSPERMS & ~vfs_context_proc(ctx)->p_fd->fd_cmask);
5152                KAUTH_DEBUG("ATTR - defaulting new file mode to %o from umask %o", vap->va_mode, vfs_context_proc(ctx)->p_fd->fd_cmask);
5153                defaulted_mode = 1;
5154        }
5155        /* set timestamps to now */
5156        if (!VATTR_IS_ACTIVE(vap, va_create_time)) {
5157                nanotime(&vap->va_create_time);
5158                VATTR_SET_ACTIVE(vap, va_create_time);
5159        }
5160        
5161        /*
5162         * Check for attempts to set nonsensical fields.
5163         */
5164        if (vap->va_active & ~VNODE_ATTR_NEWOBJ) {
5165                error = EINVAL;
5166                KAUTH_DEBUG("ATTR - ERROR - attempt to set unsupported new-file attributes %llx",
5167                    vap->va_active & ~VNODE_ATTR_NEWOBJ);
5168                goto out;
5169        }
5170
5171        /*
5172         * Quickly check for the applicability of any enforcement here.
5173         * Tests below maintain the integrity of the local security model.
5174         */
5175        if (vfs_authopaque(vnode_mount(dvp)))
5176            goto out;
5177
5178        /*
5179         * We need to know if the caller is the superuser, or if the work is
5180         * otherwise already authorised.
5181         */
5182        cred = vfs_context_ucred(ctx);
5183        if (noauth) {
5184                /* doing work for the kernel */
5185                is_suser = 1;
5186        } else {
5187                is_suser = vfs_context_issuser(ctx);
5188        }
5189
5190
5191        if (VATTR_IS_ACTIVE(vap, va_flags)) {
5192                if (is_suser) {
5193                        if ((vap->va_flags & (UF_SETTABLE | SF_SETTABLE)) != vap->va_flags) {
5194                                error = EPERM;
5195                                KAUTH_DEBUG("  DENIED - superuser attempt to set illegal flag(s)");
5196                                goto out;
5197                        }
5198                } else {
5199                        if ((vap->va_flags & UF_SETTABLE) != vap->va_flags) {
5200                                error = EPERM;
5201                                KAUTH_DEBUG("  DENIED - user attempt to set illegal flag(s)");
5202                                goto out;
5203                        }
5204                }
5205        }
5206
5207        /* if not superuser, validate legality of new-item attributes */
5208        if (!is_suser) {
5209                if (!defaulted_mode && VATTR_IS_ACTIVE(vap, va_mode)) {
5210                        /* setgid? */
5211                        if (vap->va_mode & S_ISGID) {
5212                                if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) {
5213                                        KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid);
5214                                        goto out;
5215                                }
5216                                if (!ismember) {
5217                                        KAUTH_DEBUG("  DENIED - can't set SGID bit, not a member of %d", vap->va_gid);
5218                                        error = EPERM;
5219                                        goto out;
5220                                }
5221                        }
5222
5223                        /* setuid? */
5224                        if ((vap->va_mode & S_ISUID) && (vap->va_uid != kauth_cred_getuid(cred))) {
5225                                KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit");
5226                                error = EPERM;
5227                                goto out;
5228                        }
5229                }
5230                if (!defaulted_owner && (vap->va_uid != kauth_cred_getuid(cred))) {
5231                        KAUTH_DEBUG("  DENIED - cannot create new item owned by %d", vap->va_uid);
5232                        error = EPERM;
5233                        goto out;
5234                }
5235                if (!defaulted_group) {
5236                        if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) {
5237                                KAUTH_DEBUG("  ERROR - got %d checking for membership in %d", error, vap->va_gid);
5238                                goto out;
5239                        }
5240                        if (!ismember) {
5241                                KAUTH_DEBUG("  DENIED - cannot create new item with group %d - not a member", vap->va_gid);
5242                                error = EPERM;
5243                                goto out;
5244                        }
5245                }
5246
5247                /* initialising owner/group UUID */
5248                if (VATTR_IS_ACTIVE(vap, va_uuuid)) {
5249                        if ((error = kauth_cred_getguid(cred, &changer)) != 0) {
5250                                KAUTH_DEBUG("  ERROR - got %d trying to get caller UUID", error);
5251                                /* XXX ENOENT here - no GUID - should perhaps become EPERM */
5252                                goto out;
5253                        }
5254                        if (!kauth_guid_equal(&vap->va_uuuid, &changer)) {
5255                                KAUTH_DEBUG("  ERROR - cannot create item with supplied owner UUID - not us");
5256                                error = EPERM;
5257                                goto out;
5258                        }
5259                }
5260                if (VATTR_IS_ACTIVE(vap, va_guuid)) {
5261                        if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) {
5262                                KAUTH_DEBUG("  ERROR - got %d trying to check group membership", error);
5263                                goto out;
5264                        }
5265                        if (!ismember) {
5266                                KAUTH_DEBUG("  ERROR - cannot create item with supplied group UUID - not a member");
5267                                error = EPERM;
5268                                goto out;
5269                        }
5270                }
5271        }
5272out:    
5273        return(error);
5274}
5275
5276/*
5277 * Check that the attribute information in vap can be legally written by the context.
5278 *
5279 * Call this when you're not sure about the vnode_attr; either its contents have come
5280 * from an unknown source, or when they are variable.
5281 *
5282 * Returns errno, or zero and sets *actionp to the KAUTH_VNODE_* actions that
5283 * must be authorized to be permitted to write the vattr.
5284 */
5285int
5286vnode_authattr(vnode_t vp, struct vnode_attr *vap, kauth_action_t *actionp, vfs_context_t ctx)
5287{
5288        struct vnode_attr ova;
5289        kauth_action_t  required_action;
5290        int             error, is_suser, ismember, chowner, chgroup;
5291        guid_t          changer;
5292        gid_t           group;
5293        uid_t           owner;
5294        mode_t          newmode;
5295        kauth_cred_t    cred;
5296        uint32_t        fdelta;
5297
5298        VATTR_INIT(&ova);
5299        required_action = 0;
5300        error = 0;
5301
5302        /*
5303         * Quickly check for enforcement applicability.
5304         */
5305        if (vfs_authopaque(vnode_mount(vp)))
5306                goto out;
5307        
5308        /*
5309         * Check for attempts to set nonsensical fields.
5310         */
5311        if (vap->va_active & VNODE_ATTR_RDONLY) {
5312                KAUTH_DEBUG("ATTR - ERROR: attempt to set readonly attribute(s)");
5313                error = EINVAL;
5314                goto out;
5315        }
5316
5317        /*
5318         * We need to know if the caller is the superuser.
5319         */
5320        cred = vfs_context_ucred(ctx);
5321        is_suser = kauth_cred_issuser(cred);
5322        
5323        /*
5324         * If any of the following are changing, we need information from the old file:
5325         * va_uid
5326         * va_gid
5327         * va_mode
5328         * va_uuuid
5329         * va_guuid
5330         */
5331        if (VATTR_IS_ACTIVE(vap, va_uid) ||
5332            VATTR_IS_ACTIVE(vap, va_gid) ||
5333            VATTR_IS_ACTIVE(vap, va_mode) ||
5334            VATTR_IS_ACTIVE(vap, va_uuuid) ||
5335            VATTR_IS_ACTIVE(vap, va_guuid)) {
5336                VATTR_WANTED(&ova, va_mode);
5337                VATTR_WANTED(&ova, va_uid);
5338                VATTR_WANTED(&ova, va_gid);
5339                VATTR_WANTED(&ova, va_uuuid);
5340                VATTR_WANTED(&ova, va_guuid);
5341                KAUTH_DEBUG("ATTR - security information changing, fetching existing attributes");
5342        }
5343
5344        /*
5345         * If timestamps are being changed, we need to know who the file is owned
5346         * by.
5347         */
5348        if (VATTR_IS_ACTIVE(vap, va_create_time) ||
5349            VATTR_IS_ACTIVE(vap, va_change_time) ||
5350            VATTR_IS_ACTIVE(vap, va_modify_time) ||
5351            VATTR_IS_ACTIVE(vap, va_access_time) ||
5352            VATTR_IS_ACTIVE(vap, va_backup_time)) {
5353
5354                VATTR_WANTED(&ova, va_uid);
5355#if 0   /* enable this when we support UUIDs as official owners */
5356                VATTR_WANTED(&ova, va_uuuid);
5357#endif
5358                KAUTH_DEBUG("ATTR - timestamps changing, fetching uid and GUID");
5359        }
5360                
5361        /*
5362         * If flags are being changed, we need the old flags.
5363         */
5364        if (VATTR_IS_ACTIVE(vap, va_flags)) {
5365                KAUTH_DEBUG("ATTR - flags changing, fetching old flags");
5366                VATTR_WANTED(&ova, va_flags);
5367        }
5368
5369        /*
5370         * If the size is being set, make sure it's not a directory.
5371         */
5372        if (VATTR_IS_ACTIVE(vap, va_data_size)) {
5373                /* size is meaningless on a directory, don't permit this */
5374                if (vnode_isdir(vp)) {
5375                        KAUTH_DEBUG("ATTR - ERROR: size change requested on a directory");
5376                        error = EISDIR;
5377                        goto out;
5378                }
5379        }
5380
5381        /*
5382         * Get old data.
5383         */
5384        KAUTH_DEBUG("ATTR - fetching old attributes %016llx", ova.va_active);
5385        if ((error = vnode_getattr(vp, &ova, ctx)) != 0) {
5386                KAUTH_DEBUG("  ERROR - got %d trying to get attributes", error);
5387                goto out;
5388        }
5389
5390        /*
5391         * Size changes require write access to the file data.
5392         */
5393        if (VATTR_IS_ACTIVE(vap, va_data_size)) {
5394                /* if we can't get the size, or it's different, we need write access */
5395                        KAUTH_DEBUG("ATTR - size change, requiring WRITE_DATA");
5396                        required_action |= KAUTH_VNODE_WRITE_DATA;
5397        }
5398
5399        /*
5400         * Changing timestamps?
5401         *
5402         * Note that we are only called to authorize user-requested time changes;
5403         * side-effect time changes are not authorized.  Authorisation is only
5404         * required for existing files.
5405         *
5406         * Non-owners are not permitted to change the time on an existing
5407         * file to anything other than the current time.
5408         */
5409        if (VATTR_IS_ACTIVE(vap, va_create_time) ||
5410            VATTR_IS_ACTIVE(vap, va_change_time) ||
5411            VATTR_IS_ACTIVE(vap, va_modify_time) ||
5412            VATTR_IS_ACTIVE(vap, va_access_time) ||
5413            VATTR_IS_ACTIVE(vap, va_backup_time)) {
5414                /*
5415                 * The owner and root may set any timestamps they like,
5416                 * provided that the file is not immutable.  The owner still needs
5417                 * WRITE_ATTRIBUTES (implied by ownership but still deniable).
5418                 */
5419                if (is_suser || vauth_node_owner(&ova, cred)) {
5420                        KAUTH_DEBUG("ATTR - root or owner changing timestamps");
5421                        required_action |= KAUTH_VNODE_CHECKIMMUTABLE | KAUTH_VNODE_WRITE_ATTRIBUTES;
5422                } else {
5423                        /* just setting the current time? */
5424                        if (vap->va_vaflags & VA_UTIMES_NULL) {
5425                                KAUTH_DEBUG("ATTR - non-root/owner changing timestamps, requiring WRITE_ATTRIBUTES");
5426                                required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES;
5427                        } else {
5428                                KAUTH_DEBUG("ATTR - ERROR: illegal timestamp modification attempted");
5429                                error = EACCES;
5430                                goto out;
5431                        }
5432                }
5433        }
5434
5435        /*
5436         * Changing file mode?
5437         */
5438        if (VATTR_IS_ACTIVE(vap, va_mode) && VATTR_IS_SUPPORTED(&ova, va_mode) && (ova.va_mode != vap->va_mode)) {
5439                KAUTH_DEBUG("ATTR - mode change from %06o to %06o", ova.va_mode, vap->va_mode);
5440
5441                /*
5442                 * Mode changes always have the same basic auth requirements.
5443                 */
5444                if (is_suser) {
5445                        KAUTH_DEBUG("ATTR - superuser mode change, requiring immutability check");
5446                        required_action |= KAUTH_VNODE_CHECKIMMUTABLE;
5447                } else {
5448                        /* need WRITE_SECURITY */
5449                        KAUTH_DEBUG("ATTR - non-superuser mode change, requiring WRITE_SECURITY");
5450                        required_action |= KAUTH_VNODE_WRITE_SECURITY;
5451                }
5452
5453                /*
5454                 * Can't set the setgid bit if you're not in the group and not root.  Have to have
5455                 * existing group information in the case we're not setting it right now.
5456                 */
5457                if (vap->va_mode & S_ISGID) {
5458                        required_action |= KAUTH_VNODE_CHECKIMMUTABLE;  /* always required */
5459                        if (!is_suser) {
5460                                if (VATTR_IS_ACTIVE(vap, va_gid)) {
5461                                        group = vap->va_gid;
5462                                } else if (VATTR_IS_SUPPORTED(&ova, va_gid)) {
5463                                        group = ova.va_gid;
5464                                } else {
5465                                        KAUTH_DEBUG("ATTR - ERROR: setgid but no gid available");
5466                                        error = EINVAL;
5467                                        goto out;
5468                                }
5469                                /*
5470                                 * This might be too restrictive; WRITE_SECURITY might be implied by
5471                                 * membership in this case, rather than being an additional requirement.
5472                                 */
5473                                if ((error = kauth_cred_ismember_gid(cred, group, &ismember)) != 0) {
5474                                        KAUTH_DEBUG("ATTR - ERROR: got %d checking for membership in %d", error, vap->va_gid);
5475                                        goto out;
5476                                }
5477                                if (!ismember) {
5478                                        KAUTH_DEBUG("  DENIED - can't set SGID bit, not a member of %d", group);
5479                                        error = EPERM;
5480                                        goto out;
5481                                }
5482                        }
5483                }
5484
5485                /*
5486                 * Can't set the setuid bit unless you're root or the file's owner.
5487                 */
5488                if (vap->va_mode & S_ISUID) {
5489                        required_action |= KAUTH_VNODE_CHECKIMMUTABLE;  /* always required */
5490                        if (!is_suser) {
5491                                if (VATTR_IS_ACTIVE(vap, va_uid)) {
5492                                        owner = vap->va_uid;
5493                                } else if (VATTR_IS_SUPPORTED(&ova, va_uid)) {
5494                                        owner = ova.va_uid;
5495                                } else {
5496                                        KAUTH_DEBUG("ATTR - ERROR: setuid but no uid available");
5497                                        error = EINVAL;
5498                                        goto out;
5499                                }
5500                                if (owner != kauth_cred_getuid(cred)) {
5501                                        /*
5502                                         * We could allow this if WRITE_SECURITY is permitted, perhaps.
5503                                         */
5504                                        KAUTH_DEBUG("ATTR - ERROR: illegal attempt to set the setuid bit");
5505                                        error = EPERM;
5506                                        goto out;
5507                                }
5508                        }
5509                }
5510        }
5511            
5512        /*
5513         * Validate/mask flags changes.  This checks that only the flags in
5514         * the UF_SETTABLE mask are being set, and preserves the flags in
5515         * the SF_SETTABLE case.
5516         *
5517         * Since flags changes may be made in conjunction with other changes,
5518         * we will ask the auth code to ignore immutability in the case that
5519         * the SF_* flags are not set and we are only manipulating the file flags.
5520         * 
5521         */
5522        if (VATTR_IS_ACTIVE(vap, va_flags)) {
5523                /* compute changing flags bits */
5524                if (VATTR_IS_SUPPORTED(&ova, va_flags)) {
5525                        fdelta = vap->va_flags ^ ova.va_flags;
5526                } else {
5527                        fdelta = vap->va_flags;
5528                }
5529
5530                if (fdelta != 0) {
5531                        KAUTH_DEBUG("ATTR - flags changing, requiring WRITE_SECURITY");
5532                        required_action |= KAUTH_VNODE_WRITE_SECURITY;
5533
5534                        /* check that changing bits are legal */
5535                        if (is_suser) {
5536                                /*
5537                                 * The immutability check will prevent us from clearing the SF_*
5538                                 * flags unless the system securelevel permits it, so just check
5539                                 * for legal flags here.
5540                                 */
5541                                if (fdelta & ~(UF_SETTABLE | SF_SETTABLE)) {
5542                                        error = EPERM;
5543                                        KAUTH_DEBUG("  DENIED - superuser attempt to set illegal flag(s)");
5544                                        goto out;
5545                                }
5546                        } else {
5547                                if (fdelta & ~UF_SETTABLE) {
5548                                        error = EPERM;
5549                                        KAUTH_DEBUG("  DENIED - user attempt to set illegal flag(s)");
5550                                        goto out;
5551                                }
5552                        }
5553                        /*
5554                         * If the caller has the ability to manipulate file flags,
5555                         * security is not reduced by ignoring them for this operation.
5556                         *
5557                         * A more complete test here would consider the 'after' states of the flags
5558                         * to determine whether it would permit the operation, but this becomes
5559                         * very complex.
5560                         *
5561                         * Ignoring immutability is conditional on securelevel; this does not bypass
5562                         * the SF_* flags if securelevel > 0.
5563                         */
5564                        required_action |= KAUTH_VNODE_NOIMMUTABLE;
5565                }
5566        }
5567
5568        /*
5569         * Validate ownership information.
5570         */
5571        chowner = 0;
5572        chgroup = 0;
5573
5574        /*
5575         * uid changing
5576         * Note that if the filesystem didn't give us a UID, we expect that it doesn't
5577         * support them in general, and will ignore it if/when we try to set it.
5578         * We might want to clear the uid out of vap completely here.
5579         */
5580        if (VATTR_IS_ACTIVE(vap, va_uid) && VATTR_IS_SUPPORTED(&ova, va_uid) && (vap->va_uid != ova.va_uid)) {
5581                if (!is_suser && (kauth_cred_getuid(cred) != vap->va_uid)) {
5582                        KAUTH_DEBUG("  DENIED - non-superuser cannot change ownershipt to a third party");
5583                        error = EPERM;
5584                        goto out;
5585                }
5586                chowner = 1;
5587        }
5588        
5589        /*
5590         * gid changing
5591         * Note that if the filesystem didn't give us a GID, we expect that it doesn't
5592         * support them in general, and will ignore it if/when we try to set it.
5593         * We might want to clear the gid out of vap completely here.
5594         */
5595        if (VATTR_IS_ACTIVE(vap, va_gid) && VATTR_IS_SUPPORTED(&ova, va_gid) && (vap->va_gid != ova.va_gid)) {
5596                if (!is_suser) {
5597                        if ((error = kauth_cred_ismember_gid(cred, vap->va_gid, &ismember)) != 0) {
5598                                KAUTH_DEBUG("  ERROR - got %d checking for membership in %d", error, vap->va_gid);
5599                                goto out;
5600                        }
5601                        if (!ismember) {
5602                                KAUTH_DEBUG("  DENIED - group change from %d to %d but not a member of target group",
5603                                    ova.va_gid, vap->va_gid);
5604                                error = EPERM;
5605                                goto out;
5606                        }
5607                }
5608                chgroup = 1;
5609        }
5610
5611        /*
5612         * Owner UUID being set or changed.
5613         */
5614        if (VATTR_IS_ACTIVE(vap, va_uuuid)) {
5615                /* if the owner UUID is not actually changing ... */
5616                if (VATTR_IS_SUPPORTED(&ova, va_uuuid) && kauth_guid_equal(&vap->va_uuuid, &ova.va_uuuid))
5617                        goto no_uuuid_change;
5618                
5619                /*
5620                 * The owner UUID cannot be set by a non-superuser to anything other than
5621                 * their own.
5622                 */
5623                if (!is_suser) {
5624                        if ((error = kauth_cred_getguid(cred, &changer)) != 0) {
5625                                KAUTH_DEBUG("  ERROR - got %d trying to get caller UUID", error);
5626                                /* XXX ENOENT here - no UUID - should perhaps become EPERM */
5627                                goto out;
5628                        }
5629                        if (!kauth_guid_equal(&vap->va_uuuid, &changer)) {
5630                                KAUTH_DEBUG("  ERROR - cannot set supplied owner UUID - not us");
5631                                error = EPERM;
5632                                goto out;
5633                        }
5634                }
5635                chowner = 1;
5636        }
5637no_uuuid_change:
5638        /*
5639         * Group UUID being set or changed.
5640         */
5641        if (VATTR_IS_ACTIVE(vap, va_guuid)) {
5642                /* if the group UUID is not actually changing ... */
5643                if (VATTR_IS_SUPPORTED(&ova, va_guuid) && kauth_guid_equal(&vap->va_guuid, &ova.va_guuid))
5644                        goto no_guuid_change;
5645
5646                /*
5647                 * The group UUID cannot be set by a non-superuser to anything other than
5648                 * one of which they are a member.
5649                 */
5650                if (!is_suser) {
5651                        if ((error = kauth_cred_ismember_guid(cred, &vap->va_guuid, &ismember)) != 0) {
5652                                KAUTH_DEBUG("  ERROR - got %d trying to check group membership", error);
5653                                goto out;
5654                        }
5655                        if (!ismember) {
5656                                KAUTH_DEBUG("  ERROR - cannot create item with supplied group UUID - not a member");
5657                                error = EPERM;
5658                                goto out;
5659                        }
5660                }
5661                chgroup = 1;
5662        }
5663no_guuid_change:
5664
5665        /*
5666         * Compute authorisation for group/ownership changes.
5667         */
5668        if (chowner || chgroup) {
5669                if (is_suser) {
5670                        KAUTH_DEBUG("ATTR - superuser changing file owner/group, requiring immutability check");
5671                        required_action |= KAUTH_VNODE_CHECKIMMUTABLE;
5672                } else {
5673                        if (chowner) {
5674                                KAUTH_DEBUG("ATTR - ownership change, requiring TAKE_OWNERSHIP");
5675                                required_action |= KAUTH_VNODE_TAKE_OWNERSHIP;
5676                        }
5677                        if (chgroup && !chowner) {
5678                                KAUTH_DEBUG("ATTR - group change, requiring WRITE_SECURITY");
5679                                required_action |= KAUTH_VNODE_WRITE_SECURITY;
5680                        }
5681                        
5682                        /* clear set-uid and set-gid bits as required by Posix */
5683                        if (VATTR_IS_ACTIVE(vap, va_mode)) {
5684                                newmode = vap->va_mode;
5685                        } else if (VATTR_IS_SUPPORTED(&ova, va_mode)) {
5686                                newmode = ova.va_mode;
5687                        } else {
5688                                KAUTH_DEBUG("CHOWN - trying to change owner but cannot get mode from filesystem to mask setugid bits");
5689                                newmode = 0;
5690                        }
5691                        if (newmode & (S_ISUID | S_ISGID)) {
5692                                VATTR_SET(vap, va_mode, newmode & ~(S_ISUID | S_ISGID));
5693                                KAUTH_DEBUG("CHOWN - masking setugid bits from mode %o to %o", newmode, vap->va_mode);
5694                        }
5695                }
5696        }
5697
5698        /*
5699         * Authorise changes in the ACL.
5700         */
5701        if (VATTR_IS_ACTIVE(vap, va_acl)) {
5702
5703                /* no existing ACL */
5704                if (!VATTR_IS_ACTIVE(&ova, va_acl) || (ova.va_acl == NULL)) {
5705
5706                        /* adding an ACL */
5707                        if (vap->va_acl != NULL) {
5708                                required_action |= KAUTH_VNODE_WRITE_SECURITY;
5709                                KAUTH_DEBUG("CHMOD - adding ACL");
5710                        }
5711
5712                        /* removing an existing ACL */
5713                } else if (vap->va_acl == NULL) {
5714                        required_action |= KAUTH_VNODE_WRITE_SECURITY;
5715                        KAUTH_DEBUG("CHMOD - removing ACL");
5716
5717                        /* updating an existing ACL */
5718                } else {
5719                        if (vap->va_acl->acl_entrycount != ova.va_acl->acl_entrycount) {
5720                                /* entry count changed, must be different */
5721                                required_action |= KAUTH_VNODE_WRITE_SECURITY;
5722                                KAUTH_DEBUG("CHMOD - adding/removing ACL entries");
5723                        } else if (vap->va_acl->acl_entrycount > 0) {
5724                                /* both ACLs have the same ACE count, said count is 1 or more, bitwise compare ACLs */
5725                                if (!memcmp(&vap->va_acl->acl_ace[0], &ova.va_acl->acl_ace[0],
5726                                        sizeof(struct kauth_ace) * vap->va_acl->acl_entrycount)) {
5727                                        required_action |= KAUTH_VNODE_WRITE_SECURITY;
5728                                        KAUTH_DEBUG("CHMOD - changing ACL entries");
5729                                }
5730                        }
5731                }
5732        }
5733
5734        /*
5735         * Other attributes that require authorisation.
5736         */
5737        if (VATTR_IS_ACTIVE(vap, va_encoding))
5738                required_action |= KAUTH_VNODE_WRITE_ATTRIBUTES;
5739        
5740out:
5741        if (VATTR_IS_SUPPORTED(&ova, va_acl) && (ova.va_acl != NULL))
5742                kauth_acl_free(ova.va_acl);
5743        if (error == 0)
5744                *actionp = required_action;
5745        return(error);
5746}
5747
5748
5749void
5750vfs_setlocklocal(mount_t mp)
5751{
5752        vnode_t vp;
5753        
5754        mount_lock(mp);
5755        mp->mnt_kern_flag |= MNTK_LOCK_LOCAL;
5756
5757        /*
5758         * We do not expect anyone to be using any vnodes at the
5759         * time this routine is called. So no need for vnode locking 
5760         */
5761        TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
5762                        vp->v_flag |= VLOCKLOCAL;
5763        }
5764        TAILQ_FOREACH(vp, &mp->mnt_workerqueue, v_mntvnodes) {
5765                        vp->v_flag |= VLOCKLOCAL;
5766        }
5767        TAILQ_FOREACH(vp, &mp->mnt_newvnodes, v_mntvnodes) {
5768                        vp->v_flag |= VLOCKLOCAL;
5769        }
5770        mount_unlock(mp);
5771}
5772
5773
5774#ifdef JOE_DEBUG
5775
5776record_vp(vnode_t vp, int count) {
5777        struct uthread *ut;
5778        int  i;
5779
5780        if ((vp->v_flag & VSYSTEM))
5781                return;
5782
5783        ut = get_bsdthread_info(current_thread());
5784        ut->uu_iocount += count;
5785
5786        if (ut->uu_vpindex < 32) {
5787                for (i = 0; i < ut->uu_vpindex; i++) {
5788                        if (ut->uu_vps[i] == vp)
5789                                return;
5790                }
5791                ut->uu_vps[ut->uu_vpindex] = vp;
5792                ut->uu_vpindex++;
5793        }
5794}
5795#endif
5796
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.